loosalike数据拆分

 1 import pandas  as pd
 2 user_feature_data = []
 3 flag = 1
 4 print("landing")
 5 with open("userFeature.data","r",encoding="utf-8") as f :
 6     for i,line in enumerate(f) :
 7         line = line.strip().split("|")
 8         dict_list = {}
 9         for each in line :
10             each_list = each.split(" ")
11             dict_list[each_list[0]] = " ".join(each_list[1:])#这里会给value添加一个空格和,//但是好像dataframe帮我解决了他们,dont worry
12             #print(dict_list)
13         user_feature_data.append(dict_list)
14         if i/flag >= 100000 :
15             user_feature = pd.DataFrame(user_feature_data)
16             user_feature.to_csv("userFeature_%d_part_bat.csv" % (i), index=False)
17             #print("finish:userFeature_part_bat.csv")
18             user_feature_data = []
19             flag += 1
原文地址:https://www.cnblogs.com/wbt1995/p/8920673.html