python的文件操作1，监控日志联系、读取大文件、修改文件、集合、元组、random模块以及函数初识

一、判断

#非空即真，非零即真
# 非空（None,'',[],{}）都是空
umser=input('请输入姓名').strip()
if umser:
    print('umser不为空')
else:
    print('umser为空')
print(len(umser)>0)

if 1:
    print('真')
else:
    print('假')

一、文件读写

fw=open('user2')#open(file, mode='r'),模式默认不写时为读模式
fw.read()
#注意：使用 open() 方法一定要保证关闭文件对象，即调用 close() 方法。
# open() 函数常用形式是接收两个参数：文件名(file)和模式(mode)。
fw=open('user2','r')
fw.write()#不能写
#读模式  r 只能读
#读模式，打开不存在的文件时，会报错
#读模式,不能用来写文件
fw=open('user2','w')
fw.read()#不能读
#写模式  w
#写模式，打开不存在的文件时，不会报错，会新建一个文件,而且会清空原有文件
#写模式，不能用来读文件
fw=open('user2','a')
fw.read()#不能读
#追加模式 a
#追加模式,打开不存在文件时，不会报错，会新建一个文件，而且不会清空原有文件
#追加模式，不能用来读文件

 关于read、readlin、readlins的区别

# #readline 只会读取文件第一行的内容
# fw=open('user1','r')
# s=fw.readline()
# print(s)
# #readlines 会读取文件里面所有的内容，把每行的内容放到一个List的里面
# fw=open('user1','r')
# J=fw.readlines()
# print(J)

# #先read再readline和readlines-----只有read能读到东西，因为read能读到文件的有内容，文件指针已经在最后一行了
# fw=open('user1','r')
# print(fw.read())
# print(fw.readline())
# print(fw.readlines())
#
# #先readline再放read再readlines------只有readline和read能读东西readline读第一行，read读第二行，readlines读不到东西
# fw=open('user1','r')
# print(fw.readline())
# print(fw.read())
# print(fw.readlines())
#
# #先readlines再放readline或read-------只有readlines能读到东西，readlin和read读不到东西，因为文件指定已经到最后一行了
# fw=open('user1','r')
# print(fw.readlines())
# print(fw.read())
# print(fw.readline())

读写模式、写读模式、追加模式

# #读写模式  r+
# # #打开一个存在的文件时能不能写---能写，把内容写在第一行
# fw=open('user1','r+',encoding='utf-8')
# fw.write('哈哈')
# # #打开一个不存在的文件时会不会报错---会报错
# fw1=open('user3','r+')
#
# #写读模式 w+
# #打开一个已经存在的文件时，能不能读-----能读，读不到内容，因为w会清空之前文件的内容
# fw=open('user1','w+')
# fw.read()
#
# #追加模式 a+
# #打开一个已经存在的文件时，能不能读----能读，读不到内容，因为文件指针在最后，需要把文件指定移到最前面
# fw=open('user','a+')
# fw.read()

#文件指针
#写  用a+时，必须先写read再写write
fw = open('abc','a+',encoding='utf-8')
# fw.write('abc
')
# fw.write('hhhhhhhhh
')
#读
fw.seek(0)#移到文件指针到最前面,写的时候在后面给你追加
G=fw.read()
print(G)
fw.write('abc,123')
print(G)
# fw.flush()#刷新缓冲区，立即写到磁盘上

import time #实现每分钟
# time.sleep(5)#停60s
# print('hhh')
#监控日志脚本
#需求
# 每分钟监控服务器日志，请求超过200次的IP，加入黑名单
#读文件，获取到每行的内容 open rendlins
#找到ip地址，分隔字符串按照空格分隔，取第一个元素split
#定义一个空字典，把每个ip地址放到字典里面，每个ip出现一次ip的次数就+1
#判断Ip次数，是否大于200，加入黑名单
#问题：
#第一分钟读的时候从头开始读，到第二分钟的时候文件指针又从头开始读，会读到很多重复的Ip，需要记住位置，第二分钟从第一次读到的位置开始读
seek=0
while True:#死循环
    dict = {}#ip放在循环里面是因为ip每分钟都需要重新定义字典，如果放在循环外面就不会重新定义字母，每次循环的时候又会把字典里面的内容重新执行一次，就是要字典重置
    fw=open('access.log','a+',encoding='utf-8')
    fw.seek(seek)
    P=fw.readlines()
    for i in P:
        #获取所有ip到字典里
        ip=i.split()[0]
        # print(ip)
        if ip not in dict:
            dict[ip]=1
        else:
            dict[ip]+=1
    # print(dict)
    seek=fw.tell()#记录读完之后的位置
    for ip,count in dict.items():
        if count>200:
            print('大于200的ip是：%s,它的次数是：%s'%(ip,count))
    time.sleep(10)

高效读取文件

# #文件比较大时，如果用read或readlins来读，会把文件里所有的内容都读出来，会很耗内存，我们只能一行一行来取文件内容
# #用readlin来读大文件前需要知道文件有多少行，这样就可以一行一行来读文件内容
# # 第一种
# fw = open('access.log',encoding='utf-8')#fw是文件对象或文件句柄
# dict={}
# while 1:
#     line=fw.readline().strip()#第一次读文件第一行，把第一行内容放到line里面
#     if line:
#         ip=line.split()[0]
#         dict[ip]=1
#         # pass#该处的 pass 便是占据一个位置，因为如果定义一个空函数程序会报错，当你没有想好函数的内容是可以用 pass 填充，使程序可以正常运行。
#     else:
#         break
# print(dict)
# #上面这种方式有了空行就没办法处理了，他会默认已经结束了
# # 第二种
# #直接循环文件对象，每次取的就是文件的第一行，包括空行(处理大文件里，直接循环文件对象 )
# for line in fw:
#     print(line)

# #写文件write不能写list,只能写字符串
# fw = open('user2','a+',encoding='utf-8')
# fw.seek(0)
# # fw.write([1,2,3])
# #writelines 会循环list里面的每个元素写进去
# L=['c123,','a123
']
# fw.writelines(L)

 文件修改

import os #os.chroot(path)
#用with会判断这个文件什么时候可以关闭，它会自动关闭，不需要手动关闭
#方法一：适合小文件
#思路：文件里面有很多这个要替换的内容，需要把原来的文件都拿出来，然后替换成新的内容，把原来的都删除
with open('abc','r+',encoding='utf-8') as fw:#如果用r+，光标在最前面不用seek
    # fw.seek(0)
    res =fw.read()
    res_read =res.replace('AB1','AB2')
    # print(res_read)
    fw.seek(0)
    fw.truncate()#清空文件
    fw.write(res_read)
#方法二：适合大文件，还是要一行一行处理
#打开两个文件
with open('user') as fr,open('.user','w',encoding='utf-8')as fw:
    for line in fr:
        res=line.replace('abc','ABC')
        fw.write(res)
        print(res)
os.remove('user')
os.rename('.user','user')

#split vertically分屏查看

文件操作练习：

import os
#打开文件的时候，如果在当时目录下可以直接写文件名，如果在其它目录下需要写路径
#需求：把文件里面没有交作业人名字后面加上没有交
#已经交作业的加上已交作业
#分析：
# 1、读文件
# 2、按照空格分隔每个人作业信息
# 3、判断新的文件里面有没有1，如果没有的话变成没交，如果有变成交了
with open('stu.txt','r+',encoding='utf-8') as fr ,open('.stu.txt','a+',encoding='utf-8') as fw:
    for name in fr:
        res=name.split()
        print(res)
        if len(res) >1:
           res[-1]=',已交
'
        else:
            res.append(',末交
')
        fw.writelines(res)
        print(res)
os.remove('stu.txt')
os.rename('.stu.txt','stu.txt')

 元组

Python 的元组与列表类似，不同之处在于元组的元素不能修改。

元组使用小括号，列表使用方括号。

元组创建很简单，只需要在括号中添加元素，并使用逗号隔开即可

#元组也是一个list，但是它的值不能改变
s=[1,2,3]  #list
s1=(1,2,3) #元组tuple
# tuple 只有两个方法一个是index,一个是count
mysql =(
    'cuimeiping',
    'c123456',
    '192.168.1.10',
)
print(mysql[0])
# mysql.index()#取下标
# mysql.count()#取个数
#小括号不仅能定义元组还能提高运算优先级
#如果定义元组只有一个元素需要在后面加逗号
oracel =(123,)
print(type(oracel))

集合

集合（set）是一个无序的不重复元素序列。

可以使用大括号 { } 或者 set() 函数创建集合，注意：创建一个空集合必须用 set() 而不是 { }，因为 { } 是用来创建一个空字典

定义一个空集合set()

#集合天生就可以去重
list=[1,1,2,3,4,4]
lest =set(list)
print(lest)

alist={'a','b','c'}
blist={'b','e','a'}
#交集
res1=alist.intersection(blist)
print(res1)
res2=alist &blist
print(res2)

#并集，把两个集合合并到一起，然后去除重复
aes1=alist.union(blist)
print(aes1)
aes2=alist|blist
print(aes2)

#差集：在前面一个有，在后面一个没有
bes1 =alist-blist
print(bes1)
bes2=blist.difference(alist)
print(bes2)

#对称差集 只在一个集合里面出现过的找出来
ces1=alist.symmetric_difference(blist)
print(ces1)
ces2=alist^blist
print(ces2)

#怎么给集合增加元素
alist.add('ebc')#add增加一个元素
print(alist)
alist.pop()#随机删除一个元素
print(alist)
alist.remove('c')#指定一个元素删除
print(alist)
#集合也可以循环
for ebc in alist:
    print(alist)

import string
print(string.ascii_uppercase)#所有A-Z的大写字母
print(string.ascii_lowercase)#所有a-z的小写字母
print(string.digits)#取0-9的数字
print(string.ascii_letters)#取a-z的大小写字母
print(string.punctuation)#取所有特殊字符

import random
print(random.randint(1,223))#指定一个范围产生一个整数
l =[1,2,3,4]
s ='abcdefg'
print(random.choice(s))#随机选择一个元素
print(random.sample(s,3))#随机选择几个元素
random.shuffle(l)#只能传list,用来打乱顺序，这个方法没有返回值
print(l)
print(random.uniform(1,19))#指定一个范围，取小数

f =random.uniform(1,19)#取随机的几位小数
newf = round(f,3) #保留几位小数用round
print(newf)

 函数

#实现某个功能的一些代码，函数的作用是提高代码的复用性
# 函数是组织好的，可重复使用的，用来实现单一，或相关联功能的代码段。
# 函数能提高应用的模块性，和代码的重复利用率。你已经知道Python提供了许多内建函数，比如print()。但你也可以自己创建函数，这被叫做用户自定义函数。

def hello():#定义一个函数
    print('hello')
#函数必须被调用才会执行
#调用函数
hello()

def weijing(fill_name,count):#定义函数，传函数入参
    # 形参，形式参数
    with open('a.txt','a+',encoding='utf-8') as fw:
        fw.write(count)
    # print(fill_name,count)
weijing('a.txt','123')#调用
#实参，实际参数
def readfill(fill_name):
    with open(fill_name, 'a+', encoding='utf-8') as fw:
        fw.seek(0)
        count=fw.read()
        return count#return返回,函数里面如果不写return默认返回none
#在函数里面代码是函数体
res=readfill('a.txt')
print(res)
#函数定义的变量都是局部变量，只要一出了函数都不能用了

示例1：通过手机号取钱

def get_mony(phon):#需要实现这个功能的时候需要给什么信息，给的信息就是入参    #出参，就是调用完接口后，返回给你的信息
    info ={
        '123456':100,
        '133009':200
    }
    mony=info.get(phon)
    return mony
user1=get_mony('123456')
print(user1)
user2=get_mony('133009')
print(user2)

 练习：

1、写个一函数，这个函数的功能是，传入一个数字，产生N条手机号，产生的手机号不能重复。
[150,189,188,170,132,150,186]
def phone(500):
    phone.txt
    1861232323
    23423423

import random
#方法一：
def phone(num):
    # print(type(all_phone))
    start_s=['150','130','158']
    s='012345789'
    all_phone = set()
    # print(len(all_phone))
    num=int(num)
    while len(all_phone) !=num:
        end = ''.join(random.sample(s,8))
        all_phone.add(random.choice(start_s)+end+'
')
    with open('phone1.txt', 'w', encoding='utf-8') as fw:
        fw.writelines(all_phone)
    return all_phone
res=input('请输入你想产生的条数')
res1=phone(res)
print(res1)

#方法二：
def phone(count):
    count=int(count)
    dict=set()
    while len(dict) !=count:
        start = ['150','130', '158', '138']
        res_start = random.choice(start)
        end = random.randint(0, 99999999)
        res = '%s%08d
'%(res_start, end)
        dict.add(res)
    with open('hh', 'a+', encoding='utf-8')as fr:
        fr.writelines(dict)
    return dict
res=input('请输入你想产生的条数')
res1=phone(res)
print(res1)

2、写一个函数，这个函数的功能是，传入一个数字，产生N条邮箱，产生的手机号不能重复。
邮箱前面的长度是6-12之间，产生的邮箱必须包含大写字母、小写字母、数字和特殊字符
    1、交集
    s='abc123'
    abc123@163.com
    abcdef12@sina.com
    [163.com,qq.com,sina.com,126.com]


import string
#方法一：
def email(num):
    emails=set()
    num=int(num)
    while len(emails) !=num:
        email_len = random.randint(6, 12)
        email_end = ['163.com', 'qq.com', 'sina.com', '126.com']
        endemail = random.choice(email_end)
        upper = random.choice(string.ascii_uppercase)
        lower = random.choice(string.ascii_lowercase)
        digit = random.choice(string.digits)
        punctu = random.choice(string.punctuation)
        start = upper + lower + digit + punctu
        other = email_len - 4
        other_email = random.sample(string.punctuation + string.digits + string.ascii_lowercase + string.ascii_uppercase, other)
        end_email = other_email + list(start)
        email=''.join(end_email)+endemail+'
'
        emails.add(email)
    with open('ii','a+',encoding='utf-8') as fg:
        fg.writelines(emails)
    return emails
res=input('请输入你想产生的邮箱条数')
res1=email(res)
print(res1)

# 方法二：
def email(count):
    count=int(count)
    dict=set()
    email_end = ['163.com', 'qq.com', 'sina.com', '126.com']
    upper = random.choice(string.ascii_uppercase)
    lower = random.choice(string.ascii_lowercase)
    digit = random.choice(string.digits)
    punctu = string.punctuation
    for i in range(count):
        res=random.randint(6,12)
        emailend=random.choice(email_end)
        res1=random.sample(upper,1)
        res2=random.sample(lower,1)
        res3=random.sample(digit,1)
        other=res-3
        res4=random.sample(punctu,other)
        otheremail=res1+res2+res3+res4
        emailoo=''.join(otheremail)
        allemail=emailoo+emailend+'
'
        dict.add(allemail)
        print(dict)
    with open('gg','a+',encoding='utf-8') as fo:
        fo.writelines(dict)
    return dict
res=input('请输入你想产生的邮箱条数')
res1=email(res)
print(res1)

#方法三：取交集判断是否包含大写、小写、数字、特殊字符
def email(suzi):
    count=int(suzi)
    if count<1:
        print('count不能为负数')
        return #函数里面遇到return会结束函数
    dict=set()
    email_end = ['163.com', 'qq.com', 'sina.com', '126.com']
    endemail = random.choice(email_end)
    while len(dict) !=suzi:
        res = random.randint(6, 12)
        res1=random.sample(string.ascii_letters+string.digits+string.punctuation,res)
        if set(res1)&set(string.ascii_uppercase) and set(res1)&set(string.ascii_letters) and set(res1)&set(string.digits) and set(res1)&set(string.punctuation):
            emails=''.join(res1)+endemail+'
'
            dict.add(emails)
        with open('rr','a+',encoding='utf-8') as fo:
            fo.writelines(dict)
        return dict
res=input('请输入你想产生的邮箱条数')
res1=email(res)
print(res1)