65.Python读取大文件

方式一

import os
path = r"C:UsersyztDesktopworkInfosecTestPlatformlibsLogmainline1-nohup.log"

#####################
"""
file.seek(off, whence=0)：
从文件中移动off个操作标记（文件指针），正往结束方向移动，负往开始方向移动。
如果设定了whence参数，就以whence设定的起始位为准，0代表从头开始，1代表当前位置，2代表文件最末尾位置。(注意都是相对位置) 
"""
#####################
f = open(path, "r", encoding='utf-8')
print(f.tell())
f.seek(10)
print(f.tell())
f.seek(3)
print(f.tell())

#####################
f = open(path, "rb")
size = os.path.getsize(path)
print(size)
print(f.tell())  # 获取文件指针位置
# print(f.tell())
f.seek(10, 1)  # 1表示正续移动, 1表示当前位置,10表示想后移动10个字符
print(f.tell())
f.seek(-15, 2)  # 2表示倒叙移动,2表示从文件末尾开始,-15表示向前移动15个字符
print(f.tell())
print(f.readlines()[-1].decode())   # 这个时候光标移动到了文件的-15个字符处,直接读取其后面的文字(注意需要使用decode解码)


# 方式一: 写一个函数使用,我需要读取文件的后0.15M(可以自行调节)数据返给前端
def read_log(path):
    log_read_size = 150 * 1024   # 默认读取0.5M数据(文件的最后0.1M大约500-100行)
    size = os.path.getsize(path)
    logFile = open(path, 'rb')
    if size > log_read_size:
        print(size, "*" * 50)
        logFile.seek(0, 2)
        logFile.seek(-log_read_size, 2)
        str1 = logFile.read().decode()
        print(len(str1))
        return str1
    else:
        return logFile.read().decode()


read_log(path)

方式二

import os

path = r"C:UsersyztDesktopworkInfosecTestPlatformlibsLogmainline1-nohup.log"


def tail(inputfile):
    """这个函数也可以做到读取文件后几行数据,不足之处需要自己改"""
    file_size = os.path.getsize(inputfile)
    block_size = 1024
    dat_file = open(inputfile, 'r')
    last_line = ""
    if file_size > block_size:
        maxseekpoint = (file_size // block_size)  # 最为seek的最大值
        dat_file.seek((maxseekpoint - 1) * block_size)
    elif file_size:
        # maxseekpoint = block_size % file_size
        dat_file.seek(0, 0)
    lines = dat_file.readlines()
    if lines:
        # last_line = lines[-1].strip()
        # 最后两行，N行就改数字，即可
        last_line = lines[-2:]
    # print "last line : ", last_line
    dat_file.close()
    return last_line

错误方式(其实一开始是好的然后过了一周用不了了我很懵逼)

1G文件读取后五千行

import os
from InfosecTestPlatform import settings
file_url = os.path.join(settings.BASE_DIR, "libs/Log/mainline1-jmeter.log")
print(os.path.exists(file_url))
fp = open(file_url, 'r', encoding='utf-8')

# 我将1G的文件,读出来放在一个生成器中
file_list = [line for line in fp]
# 使用列表反向切片方式读取,后5000行
file_last_5000 = file_list[:-5001:-1]
# 由于数据是方向的所有再次方向读取,还原为正向
file_last = file_last_5000[::-1]
# 之后拼接返回
str1 = "".join(file_last)


# 下面是我是用循环写了一个1G的日志文件
# fp_w = open(file_url, 'w', encoding='utf-8')
# for i in range(500):
#     fp_w.write("")