带batch_size的迭代器读取文件,解决内存不足的大数据处理问题!!!!!!!!!!完美解决

https://github.com/zhangbo2008/perfect_batch_generator_for_pyton

核心代码如下:

def bylineread(fimename,batchsize=1):
    batchsize=batchsize
    with open(fimename) as f:

        cnt=0
        out=[]
        line = f.readline()
        while line:

            out.append(line)
            cnt+=1
            if cnt==batchsize:
               yield out
               out=[]
               cnt=0
            line = f.readline()
        yield out  # 用来强制返回最后不成batch的数据.

#read是一个生成器对象
read = bylineread('1',batchsize=2)
while 1:
    try:
        print(next(read))
    except:
        print('over')
        break