制作训练集和验证集、测试集

  1 ##深度学习过程中,需要制作训练集和验证集、测试集。
  2 
  3 import os, random, shutil
  4 from config import *
  5 import re
  6 
  7 #用于清空并生成文件夹
  8 def test_train_dir():
  9     # 清空文件夹里面的所有文件,然后创建,解决重复占用问题
 10     # shutil.rmtree('要清空的文件夹名')
 11     # os.mkdir('要清空的文件夹名')
 12 
 13     if  os.path.exists(os.path.join(abs_path, "data")):
 14 
 15 
 16         shutil.rmtree(os.path.join(abs_path, "data"))
 17         # 通过makedirs创建多级目录
 18     os.makedirs(os.path.join(abs_path, "data", "test"))
 19     os.makedirs(os.path.join(abs_path, "data", "train"))
 20     os.makedirs(os.path.join(abs_path, "data", "samples"))
 21 
 22 def tet_data(fileDir):
 23     pathDir=os.listdir(fileDir)
 24     for i in pathDir:
 25         print(os.path.join(abs_path,"data","train",i))
 26         # shutil.move(fileDir + "\" + i, tarDir_test + "\" + i)
 27         if  os.path.exists(os.path.join(abs_path,"data","train",i)):
 28             continue
 29 
 30         else:
 31             shutil.copy(fileDir + "\" + i, tarDir_test + "\" + i)
 32 
 33 
 34 
 35 def moveFile(fileDir):
 36     pathDir = os.listdir(fileDir)  # 取图片的原始路径
 37     # filenumber = len(pathDir)
 38     # rate = 0.1  # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
 39     # picknumber = int(filenumber * rate)  # 按照rate比例从文件夹中取一定数量图片
 40     sample = random.sample(pathDir, 30)  # 随机选取picknumber数量的样本图片
 41     print(sample)
 42     for name in sample:
 43         # shutil.move(fileDir +"\"+ name, tarDir +"\"+name)
 44         shutil.copy(fileDir +"\"+ name, tarDir +"\"+name)
 45     # return
 46 
 47 def train_and_labels():
 48 
 49 
 50     # file1=open(os.path.join("D:Jonie_Project_sss15data","train_dir.txt"),"w")
 51     file2=open(os.path.join(abs_path,"data","samples","train_dir.txt"),"w")
 52     file3=open(os.path.join(abs_path,"data","samples","train_label.txt"),"w")
 53     dir_sam=os.listdir(os.path.join(abs_path,"data","train"))
 54     for i in dir_sam:
 55         # print(i.split("_")[-2])
 56         # print(os.path.join(abs_path,"data")+ i + "	" +i.split("_")[-2]+"
")
 57         print(os.path.join(abs_path,"data")+ i + "	" +re.split('_d+.',i)[0]+"
")
 58         # file2.write(os.path.join(abs_path,"dataset","scene_categories",i.split("_")[-2],i) +"
")#解决a_b_12.jpg.jpg截取结果为b的问题
 59         file2.write(os.path.join(abs_path,"dataset","scene_categories",re.split('_d+.',i)[0],i) +"
")
 60         # file3.write(i.split("_")[-2]+"
")
 61         file3.write(re.split('_d+.',i)[0]+"
")
 62     file2.close()
 63     file3.close()
 64 def tet_and_labels():
 65     # file1 = open(os.path.join("D:Jonie_Project_sss15data", "labels.txt"), "w")
 66     file2 = open(os.path.join(abs_path,"data","samples", "test_dir.txt"), "w")
 67     file3 = open(os.path.join(abs_path,"data","samples", "test_label.txt"), "w")
 68     dir_sam = os.listdir(os.path.join(abs_path,"data","test"))
 69     for i in dir_sam:
 70         # print(i.split("_")[-2])
 71         # print(os.path.join(abs_path,"data",i)+ i.split("_")[-2] + "
")
 72         print(os.path.join(abs_path,"data",i)+ re.split('_d+.',i)[0] + "
")
 73         # file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\" +i.split("_")[-2]+"\"+i +"
")
 74         file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\" +re.split('_d+.',i)[0]+"\"+i +"
")
 75         # file3.write( i.split("_")[-2] + "
")
 76         file3.write( re.split('_d+.',i)[0] + "
")
 77     file2.close()
 78     file3.close()
 79 
 80 if __name__ == '__main__':
 81 
 82 
 83     for num1 in range(4):
 84         test_train_dir()
 85 
 86         dir_name=os.listdir(os.path.join(abs_path,"dataset","scene_categories"))
 87         print("-------------------------",os.path.join(abs_path,"dataset","scene_categories"))
 88         for i in dir_name:
 89             # print(i)
 90             fileDir =os.path.join(abs_path,"dataset","scene_categories",i)   # 源图片文件夹路径
 91             tarDir = os.path.join(abs_path,"data","train") # 移动到新的文件夹路径
 92             tarDir_test = os.path.join(abs_path,"data","test") # 移动到新的文件夹路径
 93             moveFile(fileDir)#train
 94             # tarDir_test = r"D:Jonie_Project_sss15data	est"  # 移动到新的文件夹路径
 95 
 96             tet_data(fileDir)#test
 97         train_and_labels()
 98         tet_and_labels()
 99         if not os.path.exists(os.path.join(abs_path,"data1",str(num1))):
100             os.makedirs(os.path.join(abs_path,"data1",str(num1)))
101         new_path = os.path.join(abs_path, "data1", str(num1))
102         for derName, subfolders, filenames in os.walk(os.path.join(abs_path, "data", "samples")):
103             # print(derName/subfolders/filenames)
104             for j in range(len(filenames)):
105                 if filenames[j].endswith('.txt'):
106                     file_path = derName + '\' + filenames[j]
107                     newpath = new_path + '\' + filenames[j]
108                     shutil.move(file_path, newpath)
原文地址:https://www.cnblogs.com/jqpy1994/p/11196202.html