Python生成文件列表

 

改进

# coding=utf-8
import os

def makeFileLists(imgPath, fileName='list.txt', withLabel=False, ext=['jpg','bmp','png']):
    '''
        makeFileList 函数用于包含多层目录的文件列表创建
        Params:
            imgPath     :最上层的目录路径
            fileName    : 列表文件名称
            withLabel   : 默认为`False`,如果需要为每个图片路劲添加label,
                          则将该参数设置为`True`,图片所在目录的名称即为
                          该图片的label
            ext         : 图片格式
        Usage:
            makeFileLists('imagesRootPath', 'imageList.txt', False)
    '''
    # 判断路径是否存在
    if not os.path.exists(imgPath):
        print imagesPath, 'IS NOT EXIST, PLEASE CHECK IT!'

    # 判断路径是否为目录,如果是,则对目录下的子目录做递归操作
    elif os.path.isdir(imgPath):
        subPath = os.listdir(imgPath)
        subPath = [os.path.join(imgPath,path) for path in subPath]
        for path in subPath:
            makeFileLists(path, fileName, withLabel)
    # 如果路径不是目录,则为图片的相对路径
    else:
        # 只保存指定格式的图片
        if imgPath[-3:] in ext:
            # 以追加的方式打开文件
            f = open(fileName,'a')
            # 如果需要添加label,则将图片所在目录的名称作为label
            if withLabel:
                line = imgPath+' '+(imgPath.split('/'))[-2]+'
'
            else:
                line = imgPath+'
'
            # 写入文件
            f.writelines(line)
            f.close()

if __name__ == "__main__":
    imagesPath = 'val'
    fileName = 'val.txt'
    makeFileLists(imagesPath, fileName, True)

  

使用递归方式生成包含子目录的文件列表

#coding=utf-8
import os
import shutil
import random

def getFileList(filePath='./images'):
    print filePath
    fileNames = os.listdir(filePath)
    paths = filePath.split('/')
    print paths
    if len(paths) == 4:
        fw = paths[2]
        f = open('train_'+fw+'.txt','a')
        fileList = os.listdir(filePath)
        # ./images/-45_45/x9/9_18_-1.29865.jpg
        # 根据路径自己组合
        saveRootPath = paths[2]+'/'+paths[3]
        fileListEnd = [saveRootPath+'/'+line+' '+paths[3][1]+'
' for line in fileList] #  不要忘记加label
        f.writelines(fileListEnd)
        f.close()

    for fn in fileNames:
        subPath = filePath+'/'+fn
        if os.path.isdir(subPath):
            getFileList(subPath)

getFileList()

  


#coding=utf-8
#对一批训练数据,里面包含多个文件夹,每个文件夹下面存放的是相同类别的物体
# 根据这些文件夹生成列表、切分验证、训练集数据
import os
import shutil
import  random
#因为caffe中,不允许文件名中有空格,所有需要重命名去除空格
def stdrename(imgfiles):
    for l in imgfiles:
        x_list=l.split(' ')
        y = ''.join(x_list)
        if l!=y:
            print 'rename'
            os.rename(l,y)

def GetFileList(FindPath,FlagStr=[]):
    FileList=[]
    FileNames=os.listdir(FindPath)
    if len(FileNames)>0:
        for fn in FileNames:
            if len(FlagStr)>0:
                if IsSubString(FlagStr,fn):
                    fullfilename=os.path.join(FindPath,fn)
                    FileList.append(fullfilename)
            else:
                fullfilename=os.path.join(FindPath,fn)
                FileList.append(fullfilename)


    if len(FileList)>0:
        FileList.sort()

    return FileList

def spiltdata(path_root,valratio=0.15):
    classify_temp=os.listdir(path_root)
    classify_file=[]
    for c in classify_temp:
        classify_file.append(os.path.join(path_root,c))



    for f in classify_file:
        imgfiles=GetFileList(f)
        stdrename(imgfiles)#caffe 文件名不允许有空格
    for c in classify_temp:
        imgfiles=os.listdir(os.path.join(path_root,c))
        nval=int(len(imgfiles)*valratio)
        print nval
        imgfvals=imgfiles[:nval]
    #验证数据文件列表
        for j in imgfvals:
            if os.path.exists(os.path.join(path_root+'/'+'val',c)) is False:
                os.makedirs(os.path.join(path_root+'/'+'val',c))
            newname=os.path.join(path_root+'/'+'val',c)+'/'+j
            oldname=os.path.join(path_root,c)+'/'+j
            shutil.move(oldname,newname)
    #训练数据文件列表
        imgftrains=imgfiles[nval:]
        for j in imgftrains:
            if os.path.exists(os.path.join(path_root+'/'+'train',c)) is False:
                os.makedirs(os.path.join(path_root+'/'+'train',c))
            newname=os.path.join(path_root+'/'+'train',c)+'/'+j
            oldname=os.path.join(path_root,c)+'/'+j
            shutil.move(oldname,newname)



def writetrainlist(path_root):
    classify_temp=os.listdir(path_root)#['cropblack','cropbrown','cropwhite','cropyellow']
    classify_file=[]
    for c in classify_temp:
        classify_file.append(os.path.join(path_root,c))
    for f in classify_file:
        imgfiles=GetFileList(f)
        stdrename(imgfiles)#caffe 文件名不允许有空格

    sorted(classify_file)
    strlist=''
    for i,f in enumerate(classify_file):
        imgfiles=GetFileList(f)
        for image in imgfiles:
            print image
            strlist+=image+' '+str(i)+'
'



    txtlist=open(path_root+'.txt','w')
    txtlist.write(strlist)
    txtlist.close()



'''spiltdata('../headangle/data')'''
# writetrainlist('../faceshape/data/train')
# writetrainlist('../faceshape/data/val')


#spiltdata('../hair/data')
#writetrainlist('../hair/data/train')
#writetrainlist('../hair/data/val')




writetrainlist('../data/train')
writetrainlist('../data/val')

  

原文地址:https://www.cnblogs.com/sddai/p/10220014.html