遍历数据文件目录music中的所有歌单,提取歌曲名称以及对应的标签,并进行统计。统计各个标签的歌曲数目,并在每个标签下找出出现次数最多的3首歌曲,写入到excel表格中:

"""
l = [132,1,5,4,5,4,5,1,2,1,1,24,2,0,1,1,2,1,2,6,2,5,54]
s = set(l)  # 去重
print(s)
"""
import os
import xlwt

# 一共226604句诗
dict1 = {}
logo = xlwt.Workbook()
sheet1 = logo.add_sheet("sheet1")
for root, dirs, files in os.walk('music'):
    #   list0 = list(files)
    # list1 = list0[2]
    for file in files:
        f = open(root + "\" + file, "r", encoding='utf-8')
        for line in f.readlines():
            list0 = line.strip().split("  ")  # 观察得到标识之间,标识和歌曲之间是一两个空格分开的额
            if list0[-1] in dict1:  # 判断歌曲是否已经在字典里 list0[-1]就是最后一个元素
                for i in range(len(list0) - 1):
                    if list0[i] in dict1[list0[-1]]:  # 判断标识是否已经出现在字典里
                        dict1[list0[-1]][list0[i]] += 1
                    else:
                        dict1[list0[-1]][list0[i]] = 1
            else:
                dict2 = {}
                for i in range(len(list0) - 1):
                    dict2[list0[i]] = 1
                dict1[list0[-1]] = dict2

dict3 = {}  # 记录每一个标识
for key in dict1.keys():
    for key1 in dict1[key].keys():
        if key1 not in dict3:
            dict3[key1] = {}
"""
for key, value in dict3.items():
    print(key, value)
"""
for key in dict1.keys():  # 歌曲的循环查找
    for key1 in dict1[key].keys():  # 歌曲字典中的标识查找
        if key1 in dict3:
            # 如果标识在字典3中,标识字典的键是歌曲,值是歌曲字典中标识的值
            dict3[key1][key] = dict1[key][key1]
# 找出每一个标识中数量最高的三首歌曲
time = 1  # 序号标签
row = 0
for key in dict3.keys():
    # 排序, 以value的标准排序。返回的是以元组为元素的列表。倒序输出
    t = sorted(dict3[key].items(), key=lambda item: item[1], reverse=True)
    sheet1.write(row, 0, time)
    sheet1.write(row, 1, key)
    sheet1.write(row, 2, len(dict3[key]))
    for i in range(len(t)):
        row +=1
        sheet1.write(row, 2, t[i][0])
        sheet1.write(row, 3, t[i][1])
        if i == 2:
            break
    row += 1
    time += 1
logo.save("Music.xls")

欢迎大家指教

原文地址:https://www.cnblogs.com/qunqun/p/8906244.html