python查看与改变文件的编码格式

查看文件的编码格式

with open(r"C:UsersAdministratorDesktop1112.1 (1)smile.txt", 'rb+') as fp:
    content = fp.read()
    print(chardet.detect(content)) #{'encoding': 'UTF-8-SIG', 'confidence': 1.0, 'language': ''}

修改文件的编码格式

with open(r"C:UsersAdministratorDesktop1112.1 (1)smile.txt", 'rb+') as fp:
    content = fp.read()
    encoding = chardet.detect(content)['encoding']
    content = content.decode(encoding).encode('utf8') # 修改为utf-8格式
    fp.seek(0)
    fp.write(content)
    print(chardet.detect(content)) #{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}

案例:修改编码格式并且把所有的txt都整合成一个txt

import os
import chardet

def saveFile(filename, content):
    f = open(filename, "w", encoding="utf-8")
    f.write(content)
def readFile(filename):
    f = open(filename, "r", encoding="utf-8")
    return f.read()
def file_extension(path):
    return os.path.splitext(path)[1]
def combinetxt(rootdir, filename):
    print(rootdir,filename)
    f = open(filename, "w", encoding="utf-8")
    list = os.listdir(rootdir)
    for i in range(0, len(list)):
        path = os.path.join(rootdir, list[i])
        if os.path.isfile(path):
            if file_extension(path) == ".txt":
                content = readFile(path)
                # f.write("*****************************")
                f.write(content)
    f.close()


if __name__ == '__main__':
    # 打开文件
    path = r"C:UsersAdministratorDesktop111弄好的"
    dirs = os.listdir(path)
    # 输出所有文件和文件夹
    for file in dirs:
        filepath = path + "\" + file
        with open(filepath, 'rb+') as fp:
            content = fp.read()
            encoding = chardet.detect(content)['encoding']
            content = content.decode(encoding).encode('utf8')
            fp.seek(0)
            fp.write(content)
    outfile = "rest.txt"
    combinetxt(path, outfile)
原文地址:https://www.cnblogs.com/zhenghuiwen/p/14377704.html