词频统计+词云(傲慢与偏见)

#4.8.py
import jieba
excludes = {"先生","没有","太太","一个","自己","小姐","我们","可是","她们","他们","知道","事情","时候"}
txt = open("傲慢与偏见.txt", "r", encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
for word in words:
     if len(word) == 1:
         continue
    else:
           rword = word
           counts[rword] = counts.get(rword,0) + 1
for word in excludes:
     del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(5):
     word, count = items[i]
     print ("{0:<10}{1:>5}".format(word, count))

#4.8.py
import matplotlib.pyplot as plt
import jieba
from wordcloud import WordCloud
txt = open("傲慢与偏见.txt", "r", encoding='utf-8').read()
excludes = {"先生","没有","太太","一个","自己","小姐","我们","可是","她们","他们","知道","事情","时候"}

words = jieba.lcut(txt)
counts = {}
for word in words:
     if len(word) == 1:
         continue
     else:
            rword = word
            counts[rword] = counts.get(rword,0) + 1
for word in excludes:
     del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True) 
for i in range(5):
     word, count = items[i]

wc = WordCloud(font_path = r'.simhei.ttf',background_color = 'white',width = 500,height = 350,max_font_size=50,min_font_size=10)
wc.generate(txt)
wc.to_file("wordcloud.png")
plt.figure('wordcloud.png')
plt.imshow(wc)
plt.axis('off')
plt.show()

 

原文地址:https://www.cnblogs.com/Adaran/p/12659857.html