python 大文件读写

  1. 使用

import smbclient  # pip install smbprotocol
import zlib
import re
import os

smbclient.register_session("10.x.x.x", username="123", password="123")
img_path = smbclient.listdir(r"\10.x.x.ximg")
file_name = os.path.basename(img_path )

#方法1
# res = path.rsplit('.',1)[0].split('_')[1]
#方法2
res = re.search(r'(0xw+).tar',path)

file_checksum = res.group(1)


def read_in_chunks(filePath, chunk_size=1024*1024):
    """
    Lazy function (generator) to read a file piece by piece.
    Default chunk size: 1M
    You can set your own chunk size
    """
    # file_object = open(filePath)
    file_object = smbclient.open_file(filePath,mode='rb')
    while True:
        chunk_data = file_object.read(chunk_size)
        if not chunk_data:
            break
        yield chunk_data


def verify_file_checksum(file, checksum):
    with open (file,'rb') as f:
        if zlib.adler32(f.read())==int(checksum,16):
            print ('checksum verification pass')
            return True
    print ('checksum verification fail')
    return False

if __name__ == "__main__":
    os.remove('1.img')
    for chunk in read_in_chunks(path):
        with open('1.img',mode='ab') as fw:
            fw.write(chunk)

    verify_file_checksum('1.img',file_checksum)

  1. 模板
def read_in_chunks(filePath, chunk_size=1024*1024):
    """
    Lazy function (generator) to read a file piece by piece.
    Default chunk size: 1M
    You can set your own chunk size
    """
    file_object = open(filePath)
    while True:
        chunk_data = file_object.read(chunk_size)
        if not chunk_data:
            break
        yield chunk_data

if __name__ == "__main__":
    filePath = 'filename'
    for chunk in read_in_chunks(filePath):
        process(chunk) # <do something with chunk>

  1. fileinput()

fileinput模块可以对一个或多个文件中的内容进行迭代、遍历等操作。
该模块的input()函数有点类似文件readlines()方法,但它是一个可迭代对象,即每次只生成一行,需要用for循环迭代。在碰到大文件的读取时,无疑效率更高效。
用fileinput对文件进行循环遍历,格式化输出,查找、替换等操作,非常方便。

import fileinput
for line in fileinput.input(['sum.log']):
    print line


原文地址:https://www.cnblogs.com/amize/p/14266472.html