python学习笔记之文件操作及词频统计

文件处理
文件词频统计

上节课回顾

函数
```
def func(num):
    return 1
```

集合

s = set()
print(type(s))
# 交集： & 并集: | 差集： - 补集：^

集： & 并集: | 差集： - 补集：^

序列类型：字符串/元组/列表,因为他们都有索引

lt = [1,3,4,5,2,100,154]
lt.sort()
print(lt)
lt.reverse()
print(lt)

文件处理

写方式打开文件

fd = open(r"E:python_Programday06hamlet.txt","w")	# 以写方式打开文件
data = fd.write('234234234')	# 写入数据
print(data)
fd.close()	# 关闭资源

读方式打开文件

# 以读方式打开文件，并设置字符集解码
fd = open(r"E:python_Programday06	hreekingdoms.txt","r", encoding="utf8")
data = fd.read()
print(data)
fd.close()

文件词频统计

英文词频统计

fd = open(r"E:python_Programday06hamlet.txt","r", encoding="UTF-8")
data = fd.read().lower()
for ch in "!#$%&()*+,-./:;<=>?@[\]^_‘{|}~": # 找这里的符号并替换
    data = data.replace(ch, " ")    # 将符号都替换成空格

words = data.split(" ") # 分割
count_dict = {} # 声明字典
# print(words)
for word in words:
    if word not in count_dict:
        count_dict[word] = 1    # 没有则 添加字典数据 {key=word,value=1}
    else:
        count_dict[word] += 1   # 有则 更新数据 {key=word,value+# =1}

lt = list(count_dict.items())   # 将字典的键值对转换成列表

def func(i):
    return i[1]
lt.sort(key=func)
lt.reverse()
for i in lt[:10]:
    print(f"[{i[0]:^5},{i[1]:^5}]")
fd.close()

中文词频统计

f = open(r"E:python_Programday06	hreekingdoms.txt","r", encoding="UTF-8")
data = f.read()

data_dict = {}
badword = {"军马","大喜","丞相","却说","不可","二人","如此","商议","如何","主公","将军","不能","荆州","军士","左右","引兵","次日"}    # 脏数据
import jieba
words = jieba.lcut(data)    # 通过jieba库进行分词
for word in words:
    if len(word)==1 or word in badword :
        continue
    if "曰" in word:
        word = word.replace("曰","")
    if word not in data_dict:
        data_dict[word] = 1
    else:
        data_dict[word] += 1

list_data = list(data_dict.items()) # 拿到数据的集合转成列表
def func(i):        # 通过i拿到列表中的每个元祖
    return i[1]     # 返回元祖中的索引为1的值
list_data.sort(key=func)    # 绑定函数，根据函数返回值进行排序
list_data.reverse()     #  逆置排序

for i in list_data[:10]:    # 输出出现前10的高频词汇
    print(f"{i[0]: >3},{i[1]: <5}")

f.close()

作业

# 作业
# 1.定义一个函数，该函数可以实现在内部输入一个信息，如何该信息不能转换为正整数，则重新输入，直到能转换为正整数，则对外返回转换的正整数
# 2.定义一个函数，该函数可以实现在内部输入一个信息，如何该信息不能转换为负整数，则重新输入，直到能转换为负整数，则对外返回转换的负整数
# 3.定义一个函数，实现外界传入一个信息，内部判断是否能被转换为正整数，返回True | Flase信息
# 4.定义一个函数，实现外界传入一个信息，内部判断是否能被转换为负整数，返回True | Flase信息
# 5.定义一个函数，实现传入一个整型数字，判断并直接打印该数字是否是奇数还是偶数
# 6.写函数，检查传入列表的长度，如果大于2，那么仅保留前两个长度的内容，并将新内容返回给调用者。
# 7.写函数，检查获取传入列表或元组对象的所有奇数位索引对应的元素，并将其作为新列表返回给调用者。
# 8.定义一个函数，只要传入 "k1:v1,...,kn:vn" 这样固定格式的字符串，都可以将其转换为 {'k1':'v1',...,'kn':'vn'}这样的字典并返回
# 9.简单购物车,要求如下 (可以用函数也可以不用)：
# 要求： 实现打印商品详细信息，用户输入商品名和购买个数，则将商品名，价格，购买个数加入购物列表，如果输入为空或其他非法输入则要求用户重新输入,购买成功后打印添加到购物车里的信息.

# 作业1：
def input_positiveinteger():
    while 1:
        num = input("输入一个值：")
        if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
            continue
        if "." in num:
            num = abs(float(num))
            if int(num) != 0:
                break
            else:
                continue
        num = num.replace("-", "0")
        if num.isdigit():
            num = abs(int(num))
            if int(num) != 0:
                break
    return num

# 作业2：
def input_negativeinteger():
    while 1:
        num = input("输入一个值：")
        if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
            continue
        if "." in num:
            num = float(num)*-1
            if int(num) != 0:
                break
            else:
                continue
        num = num.replace("-", "0")
        if num.isdigit():
            num = int(num)*-1
            if int(num) != 0:
                break
    return num

# 作业3：
def is_positiveinteger(num):
    num = str(num)
    if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
        return "False"
    if "." in num:
        num = abs(float(num))
        if int(num) != 0:
            return "True"
        else:
            return "False"
    num = num.replace("-", "0")
    if num.isdigit():
        num = abs(int(num))
        if int(num) != 0:
            return "True"

# 作业4：
def is_negativeinteger(num):
    num = str(num)
    if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
        return "False"
    if "." in num:
        num = float(num) * -1
        if int(num) != 0:
            return "True"
        else:
            return "False"
    num = num.replace("-", "0")
    if num.isdigit():
        num = int(num) * -1
        if int(num) != 0:
            return "True"

# 作业5：
def is_oddeven(num):
    if int(num) % 2 ==0:
        print("偶数")
    else:
        print("奇数")

# 作业6：
def set_list(list):
    if len(list)>2:
        temp = list[:2]
        return temp
    return list

# 作业7：
def set_oddlist(list):
    temp=[]
    for i in range(len(list)):
        if i % 2 ==1:
            temp.append(list[i])
        else:
            continue
    return temp

# 作业8：
def set_dict(nums):
    dict_count = {}
    list = str(nums).split(",") # 切割字符串
    for i in list:
        temp = i.split(":") # 再次切割字符串
        dict_count[temp[0]] = temp[1]   # 存入字典
    return dict_count

# 作业9：
def shopping_list():
    list_count = []
    while 1:
        msg_dic = {
            'apple': 10,
            'tesla': 100000,
            'mac': 3000,
            'lenovo': 30000,
            'chicken': 10,
        }
        for i in msg_dic.items():
            print(f"name:{i[0]} price:{i[1]}")
        while 1:
            name = input("商品>>:")
            number = input("个数>>:")
            if name == "quit" or number=="quit":
                return
            if name!="" and msg_dic.get(name) != None:
                break
        list_count.append((name, msg_dic.get(name),number))
        print(list_count)