中文词频统计及词云制作

import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
fr=open('t.txt','r',encoding='utf-8').read()
words=jieba.lcut(fr)
excludes={'.....'}
counts={}

for word in words:
    if len(word)==1:
        continue
    else:
        counts[word] = counts.get(word,0)+1
        
for word in excludes:
    del(counts[word])
    
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)

for i in range(20):
    word,count=items[i]
    print("{0:<10}{1:>5}".format(word,count))
wl_split=word,count
mywc = WordCloud().generate(wl_split)
plt.show()

 

原文地址:https://www.cnblogs.com/lqy-36/p/7591174.html