一. python进阶(文件的读写编码)

一. 读取文件

过程:
   打开文件
   读文件内容
   关闭文件

打开文件:
       open(path,flag,[encoding [ERRORS]])
        path:要打开文件的路径
        flag :打开方式
            * r      以只读的方式打开文件 文件法人描述符放在开头     
            * rb     以二进制格式打开一个文件用于只读 文件的描述符放在开头  (二进制用来加密)
              r+     打开一个文件用于读写 文件的描述符放在开头
            * w       打开一个文件只用于写入 如果该文件已经存在会覆盖 如果不存在则创建新文件
            * wb     打开一个文件只用于写入二进制 如果该文件已经存在会覆盖 如果不存在则创建新文件
            * w+     打开一个文件用于读写
             a      打开一个文件用于追加 如果文件存在 文件描述符将会放到文件末尾
             a+      
        encoding    编码格式   常用的的是utf-8
        ERRORS      错误处理

1.read() 与rend(参数)

# 打开文件
path=r"D:Studypythonpy211.txt"
# 忽略错误  ignore
# f=open(path,"r",encoding="utf-8",errors="ignore")
f=open(path,"r",encoding="utf-8")
# 读文件内容
# 读取文件里的所有内容    read()
str1=f.read()
print(str1)  

# my name is 哈哈哈
# i lover you to
# 哈哈哈哈啦啦啦
# 关闭文件

# 打开文件

path=r"D:Studypythonpy21.txt"

# 忽略错误  ignore
# f=open(path,"r",encoding="utf-8",errors="ignore")
f=open(path,"r",encoding="utf-8")
# 读文件内容
# 读取文件里面10个字符    read(10)

# 读取文件里指定字符数read(n)
str1=f.read(10)   
print(str1)   # my name is

# 关闭文件

2.readline() 与 readline(参数) readlines()

# 打开文件
path=r"D:Studypythonpy21.txt"
# 忽略错误  ignore
# f=open(path,"r",encoding="utf-8",errors="ignore")
f=open(path,"r",encoding="utf-8")

# 读文件内容
#读取文件里内容一整行  包括换行符 /n                     readline
str1=f.readline()   
print(str1)   # my name is 哈哈哈
# 关闭文件




path=r"E:Studypythonpy21.txt"
# 忽略错误  ignore
# f=open(path,"r",encoding="utf-8",errors="ignore")
f=open(path,"r",encoding="utf-8")

# 读文件内容
  读取指定字符串    
str1=f.readline(10)   
print(str1)   # my name is 哈哈哈

# 打开文件
path=r"D:Studypythonpy21.txt"
# 忽略错误  ignore
# f=open(path,"r",encoding="utf-8",errors="ignore")
f=open(path,"r",encoding="utf-8")

# 读文件内容
#读取文件里内容 所有行    并返回列表                    readlines
str1=f.readlines()   
print(str1)   # my name is 哈哈哈
#['my name is 哈哈哈
', '
', 'i lover you to
', '
', '哈哈哈哈啦啦啦']
# 关闭文件

# 打开文件
path=r"D:Studypythonpy21.txt"
# 忽略错误  ignore
# f=open(path,"r",encoding="utf-8",errors="ignore")
f=open(path,"r",encoding="utf-8")
# 读文件内容
# 若给定的数字大于0 返回实际size节的行数
str1=f.readlines(25)   
print(str1)   
# ['my name is 哈哈哈
', '
', 'i lover you to
']
# 关闭文件
f.close()

3. seek (str) 表示从第字符开始文件内容

# fileObject.seek(offset[, whence])
# 参数
# offset -- 开始的偏移量，也就是代表需要移动偏移的字节数
# whence：可选，默认值为 0。给offset参数一个定义，表示要从哪个位置开始偏移；0代表从文件开头开始算起，1代表从当前位置开始算起，2代表从文件末尾算起。
# 返回值

# 打开文件
path=r"D:Studypythonpy21.txt"
# 忽略错误  ignore
# f=open(path,"r",encoding="utf-8",errors="ignore")
f=open(path,"r",encoding="utf-8")
# 读文件内容
# 修改描述符的位置
# seek (str)  表示从第字符开始文件内容
# seek() 方法用于移动文件读取指针到指定位置。
f.seek(10)
str1=f.read()   
print(str1)   
#  哈哈哈
# i lover you to
# 哈哈哈哈啦啦啦
# 关闭文件
f.close()

# 打开文件读文件的一个完整的过程  方法一
# 
path=r"E:Studypythonpy211.txt"

try:
    f=open(path,"r",encoding="utf-8")
    str1=f.read()   
    print(str1)   

finally:
    if f:

       f.close()

# my name is 哈哈哈

# i lover you to

# 哈哈哈哈啦啦啦
# 关闭文件

# 打开文件读文件的一个完整的过程  方法二

with open(path,"r",encoding="utf-8") as f2:
     print(f2.read())

#      my name is 哈哈哈
# i lover you to
# 哈哈哈哈啦啦啦

二 .写文件

# 写文件
path=r"E:Studypythonpy212.txt"
f=open(path,"w",encoding="utf-8")
# 1 将信息写入缓冲区
f.write("my name is hao do you do")
# 2 刷新缓冲区
#  直接把内部缓冲区的数据立刻写入文件， 而不是被动的等待 自动刷入缓冲区

f.flush()
while True:
    pass
f.close()

# 写文件
import time
path=r"E:Studypythonpy213.txt"
f=open(path,"w",encoding="utf-8")
# 1 将信息写入缓冲区
# 2 刷新缓冲区
#  直接把内部缓冲区的数据立刻写入文件， 而不是被动的等待 自动刷入缓冲区

# f.flush()
while 1:
     f.write("my name is hao do you doLLLLLLLLLLLLLLLLLLLLLLLLLLLL")
     f.flush()
     time.sleep(0.1)
f.close()

# 写文件
import time

# 简易写法 写文件    
# 这种写法不用关闭和刷新
path=r"D:Studypythonpy214.txt"
with open(path,"a",encoding="utf-8")as f2:

    f2.write("哈哈哈哈哈啊哈哈哈哈啊哈哈哈哈哈哈哈哈")

1.案例

path=r"D:Studypythonpy215.txt"
# 注意编码和解码的字符集要一致
# 写入文件编码
with open(path,"wb")as f2:
    str="my name is haha  heee1s 张三丰"
    f2.write(str.encode("utf-8"))

# 读文件解码
with open(path,"rb") as f3:
   data=f3.read()
   print(data)               #  b'my name is haha  heee1s'  带b的二进制
   print(type(data))         #  <class 'bytes'>             字节类型
   
   newData=data.decode("utf-8")
 
   print(newData)

import pickle  #数据持久性模块


# 写入文件
path=r"E:Studypythonpy221.txt"
mylist=[1,2,3,4,5,6,"sumk is a good man fffffffffffffffffffffffffffffffffffffffffffff"]

f=open(path,"wb")
pickle.dump(mylist,f)
f.close()
# 用于序列化的两个模块
# 　　json：用于字符串和Python数据类型间进行转换
# 　　pickle: 用于python特有的类型和python的数据类型间进行转换
# 　　json提供四个功能：dumps,dump,loads,load
# 　　pickle提供四个功能：dumps,dump,loads,load

# pickle可以存储什么类型的数据呢？
# 所有python支持的原生类型：布尔值，整数，浮点数，复数，字符串，字节，None。
# 由任何原生类型组成的列表，元组，字典和集合。
# 函数，类，类的实例


# 读取文件
f2=open(path,"rb") 
timelist=pickle.load(f2)
print(timelist)
f2.close()  
# [1, 2, 3, 4, 5, 6, 'sumk is a good man fffffffffffffffffffffffffffffffffffffffffffff']

三. 编码

Python中默认的编码格式是 ASCII 格式，在没修改编码格式时无法正确打印汉字，所以在读取中文时会报错。

解决方法为只要在文件开头加入 # -*- coding: UTF-8 -*- 或者 #coding=utf-8 就行了

TF-8 --> decode 解码 --> Unicode

Unicode --> encode 编码 --> GBK / UTF-8 等

使用type可以查看编码形式，unicode是‘unicode’,gbk和utf-8是‘str或bytes’。

# byte为字节，bit为位，1byte=8bit。
# 数据存储是以“字节”（Byte）为单位，数据传输是以大多是以“位”（bit，又名“比特”）为单位，一个位就代
# 表一个0或1（即二进制），每8个位（bit，简写为b）组成一个字节（Byte，简写为B），是最小一级的信息单位。
# # print(help(list))

a = '编码'                       # a是unicode类型
b = a.encode('utf-8')       # b是utf-8类型
c = a.encode('gbk')        #c是gbk类型
print (a ,b,c)
print (type(a),type(b),type(c))
#python3默认是unicode类型
# 编码 b'xe7xbcx96xe7xa0x81' b'xb1xe0xc2xeb'
# <class 'str'> <class 'bytes'> <class 'bytes'>


print("**********************************")

aa=bytes('你好',encoding="GBK")          # unicode 转换成GBK的bytes
print(aa)       #    b'xc4xe3xbaxc3'
# 编码encoding
print(aa.decode('GBK'))  # 你好


print("**********************************")

# 解（码）decode
bb=bytes('你好你是谁',encoding="utf-8")  # unicode 转换成utf-8的bytes
print(bb)     #  b'xe4xbdxa0xe5xa5xbdxe4xbdxa0xe6x98xafxe8xb0x81'
print(bb.decode("utf-8"))   # 你好你是谁

print("**********************************")

# 网络编程只能二进制
# 照片和视频也只能二进制存储
# html 网页也是编码
print(ord("好"))      # 22909
print(ord("A"))     # 65
print(ord("5"))   # 53
print(chr(68))  # D

print("**********************************")
#repr() 函数将对象转化为供解释器读取的形式。
# 返回一个对象的 string 格式

print(repr('1'))    # '1'
print(repr(1))   # 1


s = 'RUNOOB'
a=repr(s)
print(a)    # 'RUNOOB'

print("****************************************************")

dict = {'runoob': 'runoob.com', 'google': 'google.com'};
print(type(dict))   # <class 'dict'>
cc=repr(dict)  # {'runoob': 'runoob.com', 'google': 'google.com'}
print(cc)
print(type(cc))  # <class 'str'>


print("****************************************************")
decode                 encode
str ---------> str(Unicode) ---------> str
>>> u = '中文'                 # 指定字符串类型对象u
>>> str1 = u.encode('gb2312')  # 以gb2312编码对u进行编码，获得bytes类型对象
>>> print(str1)
b'xd6xd0xcexc4'
>>> str2 = u.encode('gbk')     # 以gbk编码对u进行编码，获得bytes类型对象
>>> print(str2)
b'xd6xd0xcexc4'
>>> str3 = u.encode('utf-8')   # 以utf-8编码对u进行编码，获得bytes类型对象
>>> print(str3)
b'xe4xb8xadxe6x96x87'
>>> u1 = str1.decode('gb2312') # 以gb2312编码对字符串str进行解码，获得字符串类型对象
>>> print('u1')
'中文'
>>> u2 = str1.decode('utf-8')  # 报错，因为str1是gb2312编码的
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd6 in position 0: invalid continuation byte

# 变成二进制   字节编码   编码转换成二进制   注意:解码和编码要一致性
name="你好"
# 编码
print(bytes(name,encoding='utf-8'))  #  b'xe4xbdxa0xe5xa5xbd'
# 解码
print(bytes(name,encoding='utf-8').decode('utf-8'))  #  你好


# 编码
print(bytes(name,encoding='gbk'))   # b'xc4xe3xbaxc3'
# 解码
print(bytes(name,encoding='gbk').decode('gbk'))  # 你好

# 编码
s="张三来了哈哈哈"
print(type(s))
a=bytes(s,"utf-8")
print(a)
C=s.encode("utf-8")
print(C)
# <class 'str'>
# b'xe5xbcxa0xe4xb8x89xe6x9dxa5xe4xbax86xe5x93x88xe5x93x88xe5x93x88'
# b'xe5xbcxa0xe4xb8x89xe6x9dxa5xe4xbax86xe5x93x88xe5x93x88xe5x93x88'