分享一个统计文档中不同key的个数的python脚本

前提:

原统计文档中条目以空格分隔或只有一列(为了方便awk筛选出某一个列)

 1 #!/usr/bin/env python
 2 # -*- coding:utf-8 -*-
 3 import sys
 4 import os
 5 
 6 PRINTRED = "33[1;31m"
 7 PRINTGREEN = "33[0;32;47m"   #green color backgroud is white
 8 PRINTGREEN_SIM = "33[1;32m"  #no backgroud green color
 9 PRINTBLUE = "33[1;34m"
10 PRINTCOLOR_END= "33[0m"
11 
12 def getAllItemList(fileName):
13     try:
14         fp = open(fileName, 'r')
15         fLines = fp.readlines()
16         fp.close()
17         perList = []
18         for item  in  fLines:
19             #perSplit = line.split(';')
20             #for item in perSplit:
21             if item.isspace() == False:
22                 # space do nothing
23                 perList.append(item.strip())
24 
25         #print(perList)
26         return perList
27     except IOError:
28         print("error: file not found, please check it !!!")
29         sys.exit(0)
30 
31 def calSameItemCount(itemList):
32     sameCountDict={}
33     for item in itemList:
34         if sameCountDict.has_key(item):
35             sameCountDict[item] += 1
36         else:
37             #print("notfindkey")
38             sameCountDict[item] = 1
39     print(("不重复key个数:{cstart}%d{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (len(sameCountDict)))
40     print("不重复的key如下:")
41     for key,value in sameCountDict.items():
42         print(("%s 有[{cstart}%d{cend}]个!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key, value))
43         #print(("{cstart}%s{cend}").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (key))
44 
45 
46 def main():
47     #想要统计的key的列在第10列,原文档固定名为count.txt
48     os.system("awk '{print $10}' count.txt > awkCountitem.txt")
49 
50     #fileName = raw_input("please input fileName: 
")
51     fileName = "awkCountitem.txt"
52     itemList = getAllItemList(fileName)
53     itemCount = len(itemList)
54     print(("%s:样本数据[{cstart}%d{cend}] 个!!!").format(cstart=PRINTRED,cend=PRINTCOLOR_END) % (fileName, itemCount))
55     calSameItemCount(itemList)
56 
57 if __name__=="__main__":
58   main()
我是一块砖,哪里需要往哪搬。
原文地址:https://www.cnblogs.com/daimadebanyungong/p/14554013.html