python统计字词练习

方法一:

 1 import operator
 2 from nltk.corpus import stopwords
 3 stop_words = stopwords.words('English')#目的是去除人称代词等,注意根据编译提示下载相应库
 4 
 5 speech_text = '''
 6 He is a good boy
 7 She is a good girl
 8 We are very nice
 9 Hello boy hello boy 
10 hello girl hello girl
11 hello dog 
12 hello cat
13 hello pig
14 '''
15 speech = speech_text.lower().split()
16 dic = {}
17 for word in speech:
18     if word not in dic:
19         dic[word] = 1 #给词典赋值
20     else:
21         dic[word] = dic[word] + 1
22 swd = sorted(dic.items(), key = operator.itemgetter(1),reverse = True)
23 #stop_words
24 for k,v in swd:
25     if k not in stop_words:
26         print(k,v)
27 
28 print(swd)

方法二:

 1 import operator
 2 from nltk.corpus import stopwords
 3 stop_words = stopwords.words('English')#目的是去除人称代词等,注意根据编译提示下载相应库
 4 
 5 speech_text = '''
 6 He is a good boy
 7 She is a good girl
 8 We are very nice
 9 Hello boy hello boy 
10 hello girl hello girl
11 hello dog 
12 hello cat
13 hello pig
14 '''
15 speech = speech_text.lower().split()
16 from collections import Counter
17 c = Counter(speech)
18 for sw in stop_words:
19     del c[sw]
20 print(c.most_common(10)) #打印前10项
View Code
怕什么真理无穷,进一寸有一寸的欢喜。---胡适
原文地址:https://www.cnblogs.com/hujianglang/p/9637900.html