中文词频统计

import jieba
f=open('news.txt','r')
news=f.read()
news1=list(jieba.lcut(news))
wordset=set(news1)
key={}
dict={}
for i in wordset:
key[i] = news1.count(i)
dict[i]=key[i]
# for wordkey,value in dict.items():
# print(wordkey+':'+str(value))
missword= {',','。','的','地','得','一','~',';',':',''}
for wordkey2 in dict.items():
if wordkey2 in missword:
del wordkey2
for wordkey,value in dict.items():
print(wordkey+':'+str(value))
sort=sorted(key.items(),key=lambda d:d[1],reverse=True)
for j in range(20):
print(sort[j])

原文地址:https://www.cnblogs.com/cairuiqi/p/8660384.html