9.27作业

英文字频统计

strHello='''...'''.lower()
fo = open('hello.txt', 'r', encoding='utf-8')
hello = fo.read()
fo.close()
print(hello)
sep = ''',?'''
for ch in sep:
    strHello = strHello.replace(ch, '')

    strList = strHello.split()
    print(len(strList),strList)
    strSet = set(strList)
    exclude = {'i', 'in', 'the''anymore'}
    strSet = strSet-exclude

    print(len(strSet),strSet)

    strDict = {}
    for hello in strSet:
        strDict[hello] = strList.count(hello)

        print(strDict.items())

wcList = list(strDict.items())
wcList.sort()
print(strDict.items())
print(wcList[:20])

运行结果

 中文字频统计(小说《装在套子里的人》

import jieba

fo = open ('taozi.txt', 'r', encoding='utf-8')
zhuang = fo.read ().lower ()
fo.close ()
print (zhuang)

sep = ',。?!;:“”‘’-——<_/>'
for en in sep:
    zhuang = zhuang.replace (en, '')

zhaung = list (jieba.cut_for_search (zhuang))

strSet = set (zhuang)
# print(len(strSet), strSet)

strDict = dict ()
for word in strSet:
    strDict[word] = zhuang.count (word)
    # print(len(strDict), strDict)

wcList = list (strDict.items ())
# print(wcList)
wcList.sort (key=lambda x: x[1], reverse=True)
# print(wcList)

for i in range (20):
    print (wcList[i])

运行结果

原文地址:https://www.cnblogs.com/fanfanfan/p/9712284.html