综合练习:词频统计

fo= open('C:/Users/Administrator/PycharmProjects/bd/test.txt', 'r')
str=fo.read()

tep = ''',.?:'"'''
exclude = {'the','and','of','to','in','a','he','for','t','we','don','i','you','it','s','re','ve'}

for c in tep:
    str=str.replace(c," ")

str=str.lower().split()

wordDict = {}
wordSet = set(str) - exclude
for w in wordSet:
    wordDict[w] = str.count(w)

dictList = list(wordDict.items())
dictList.sort(key= lambda x:x[1],reverse=True)

for w in dictList:
    print(w)
fo.close()

原文地址:https://www.cnblogs.com/yxbdbolgs/p/8649588.html