Python3 数据结构之词频统计(英文)

import string

path = r'C:UsersBlackDesktopWalden.txt'
with open(path, 'r', encoding='utf-8') as text:
    # 列表推导式,去除标点符号, 首字母大写转化为小写
    words = [raw_word.strip(string.punctuation).lower() for raw_word in text.read().split()]
    # 去重
    words_index = set(words)
    # 创建一个以单词为键, 频率为值得字典
    counts_dict = {index: words.count(index) for index in words_index}
    # 借助匿名函数,按频率降序排列
    for word in sorted(counts_dict, key=lambda x: counts_dict[x], reverse=True):
        print(f'{word}-{counts_dict[word]} times')
原文地址:https://www.cnblogs.com/bigb/p/11644433.html