python学习之路-day7

一、常用模块

shutil

xml

configparser

hashlib

subprocess

1、shutil模块

用途：高级的文件、文件夹、压缩包处理模块

shutil.copyfileobj(fsrc,fdst[,length])

将文件内容拷贝到另一个文件中

1 import shutil
2 shutil.copyfileobj(open('old.xml','r'),open('new.xml','w'))

shutil.copyfile(src,dst)

拷贝文件

til.copyfile('haha.log','xixi.log')   #目标文件无需存在

shutil.copymode(src,dst)

仅拷贝权限。内容、组、用户均不变

shutil.copymode('haha.log','xixi.log')   #目标文件必须存在

shutil.copystat(src,dst)

仅拷贝状态的信息，包括：mode bits，atime，mtime，flags

shutil.copystat('haha.log','xixi.log')     #目标文件必须存在

shutil.copy(src,dst)

拷贝文件和权限

shutil.copy('haha.log','xixi.log')

shutil.copy2(src,dst)

拷贝文件和状态信息

shutil.copy2('haha.log','xixi.log')

shutil.ignore_patterns(*patterns)
shutil.copytree(src,dst,symlinks=False,ignore=None)

递归的去拷贝文件夹

shutil.copytree('old_dir','new_dir',ignore=shutil.ignore_patterns('*.pyc','tmp*'))       #目标目录不能存在，注意对new_dir目录父目录要有可写权限，ignore的意思是排除

shutil.copytree('f1','f2',symlinks=True,ignore=shutil.ignore_patterns('*.pyc','tmp*'))

#通常对的拷贝都把软链接拷贝成硬链接，即对待软链接来说，创建新的文件

拷贝软链接

shutil.rmtree(path[,ignore_errors[,onerror]])

递归的去删除文件

shutil.rmtree('old_dir')

shutil.move(src,dst)

递归的去移动文件，它类似mv命令，其实就是重命名

shutil.move('old_file','new_file')

shutil.make_archive(base_name,format,...)

创建压缩包并返回文件路径，例如：zip、tar

base_name：压缩包的文件名，也可以是压缩包的路径。只是文件名时，则保存至当前目录，否则保存至指定路径，
如 data_bak =>保存至当前路径
如：/tmp/data_bak =>保存至/tmp/
format：压缩包种类，“zip”, “tar”, “bztar”，“gztar”
root_dir：要压缩的文件夹路径（默认当前目录）
owner：用户，默认当前用户
group：组，默认当前组
logger：用于记录日志，通常是logging.Logger对象

#将 /data下的文件打包放置当前程序目录
ret = shutil.make_archive('data_bak','gztar',root_dir='/data')

#将 /data下的文件打包放置 /tmp/目录
ret = shutil.make_archive('/tmp/data_bak','gztar',root_dir='/data')

import zipfile

#压缩
z = zipfile.ZipFile('access_log.zip','w')
z.write('a.log')
z.write('data.data')
z.close()

#解压
z = zipfile.ZipFile('access_log.zip','r')
z.extractall(path='.')
z.close()

zipfile压缩解压缩

import tarfile

#压缩
t = tarfile.open('/tmp/data.tar','w')
t.add('/test1/a.py',arcname='a.bak')
t.add('/test1/b.py',arcname='b.bak')
t.close()

#解压
t = tarfile.open('/tmp/data.tar','r')
t.extractall('/data')
t.close()

tarfile压缩解压缩

2、xml模块

xml是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单，不过，在json还没诞生的黑暗年代，大家只能选择使用xml，至今很多传统公司如金融行业的很多系统的接口还主要是xml

xml的格式如下，就是通过<>节点来区别数据结构的

<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    <egon age="18">hello</egon></country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    <egon age="18">hello</egon></country>
    <country name="Panama">
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    <egon age="18">hello</egon></country>
</data>

xml数据

xml协议在各个语言里都是支持的，在python中可以用以下模块操作xml：

print(root.iter('year'))          #全文搜索
print(root.find('country'))       #在root的子节点找，只找一个
print(root.findall('country'))    #在root的子节点找，找所有

import xml.etree.ElementTree as ET
tree = ET.parse('a.xml')
root = tree.getroot()
print(root.tag)

#遍历xml文档
for child in root:
    print('====>',child.tag,child.attrib,child.attrib['name'])
    for i in child:
        print(i.tag,i.attrib,i.text)

#只遍历year节点
for node in root.iter('year'):
    print(node.tag,node.text)

#增加node
for country in root.iter('country'):
    e = ET.Element('xiaoyu')
    e.text = 'hello'
    e.attrib = {'age':'18'}
    country.append(e)
tree.write('a.xml')
#删除node
for country in root.iter('country'):
    print(country.tag)
    rank = country.find('rank')
    if int(rank.text) > 10:
        country.remove(rank)
tree.write('a.xml')
#修改node
for node in root.iter('year'):
    new_year = int(node.text) + 1
    node.text = str(new_year)
    node.set('updated','yes')
    node.set('version','1.0')
tree.write('a.xml')

xml文件的增删改查

import xml.etree.ElementTree as ET
 
 
new_xml = ET.Element("namelist")
name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
age = ET.SubElement(name,"age",attrib={"checked":"no"})
sex = ET.SubElement(name,"sex")
sex.text = '33'
name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"})
age = ET.SubElement(name2,"age")
age.text = '19'
 
et = ET.ElementTree(new_xml) #生成文档对象
et.write("test.xml", encoding="utf-8",xml_declaration=True)
 
ET.dump(new_xml) #打印生成的格式

创建xml文件


3、configparser模块

# 注释1
; 注释2

[section1]
k1 = v1
k2:v2
user=egon
age=18
is_admin=true
salary=31

[section2]
k1 = v1

配置文件

读操作

import configparser

config=configparser.ConfigParser()
config.read('a.cfg')

#查看所有的标题
res=config.sections() #['section1', 'section2']
print(res)

#查看标题section1下所有key=value的key
options=config.options('section1')
print(options) #['k1', 'k2', 'user', 'age', 'is_admin', 'salary']

#查看标题section1下所有key=value的(key,value)格式
item_list=config.items('section1')
print(item_list) #[('k1', 'v1'), ('k2', 'v2'), ('user', 'egon'), ('age', '18'), ('is_admin', 'true'), ('salary', '31')]

#查看标题section1下user的值=>字符串格式
val=config.get('section1','user')
print(val) #egon

#查看标题section1下age的值=>整数格式
val1=config.getint('section1','age')
print(val1) #18

#查看标题section1下is_admin的值=>布尔值格式
val2=config.getboolean('section1','is_admin')
print(val2) #True

#查看标题section1下salary的值=>浮点型格式
val3=config.getfloat('section1','salary')
print(val3) #31.0

修改操作

import configparser

config=configparser.ConfigParser()
config.read('a.cfg')


#删除整个标题section2
config.remove_section('section2')

#删除标题section1下的某个k1和k2
config.remove_option('section1','k1')
config.remove_option('section1','k2')

#判断是否存在某个标题
print(config.has_section('section1'))

#判断标题section1下是否有user
print(config.has_option('section1',''))


#添加一个标题
config.add_section('egon')

#在标题egon下添加name=egon,age=18的配置
config.set('egon','name','egon')
config.set('egon','age',18) #报错,必须是字符串


#最后将修改的内容写入文件,完成最终的修改
config.write(open('a.cfg','w'))

4、hashlib模块

hash：一种算法 ,3.x里代替了md5模块和sha模块，主要提供 SHA1, SHA224, SHA256, SHA384, SHA512 ，MD5 算法
三个特点：
1.内容相同则hash运算结果相同，内容稍微改变则hash值则变
2.不可逆推
3.相同算法：无论校验多长的数据，得到的哈希值长度固定。

import hashlib

m = hashlib.md5()
m.update('hello'.encode('utf-8'))
print(m.hexdigest())    #5d41402abc4b2a76b9719d911017c592

m.update('haha'.encode('utf-8'))
print(m.hexdigest())    #ab540ca02784f1e2f0fb8e1d2d1d92a9

m2 = hashlib.md5()
m2.update('hellohaha'.encode('utf-8'))
print(m2.hexdigest())   #ab540ca02784f1e2f0fb8e1d2d1d92a9

'''
注意：把一段很长的数据update多次，与一次update这段长数据，得到的结果一样
但是update多次为校验大文件提供了可能。
'''

以上加密算法虽然依然非常厉害，但时候存在缺陷，即：通过撞库可以反解。所以，有必要对加密算法中添加自定义key再来做加密。

import hashlib
 
# ######## 256 ########
 
hash = hashlib.sha256('898oaFs09f'.encode('utf8'))
hash.update('alvin'.encode('utf8'))
print (hash.hexdigest())#e79e68f070cdedcfe63eaf1a2e92c83b4cfb1b5c6bc452d214c1b7e77cdfd1c7

import hashlib
passwds=[
    'alex3714',
    'alex1313',
    'alex94139413',
    'alex123456',
    '123456alex',
    'a123lex',
    ]
def make_passwd_dic(passwds):
    dic={}
    for passwd in passwds:
        m=hashlib.md5()
        m.update(passwd.encode('utf-8'))
        dic[passwd]=m.hexdigest()
    return dic

def break_code(cryptograph,passwd_dic):
    for k,v in passwd_dic.items():
        if v == cryptograph:
            print('密码是===>33[46m%s33[0m' %k)

cryptograph='aee949757a2e698417463d47acac93df'
break_code(cryptograph,make_passwd_dic(passwds))

模拟撞库破解密码

python 还有一个 hmac 模块，它内部对我们创建 key 和内容进行进一步的处理然后再加密:

import hmac
2 h = hmac.new('alvin'.encode('utf8'))    #加盐
3 h.update('hello'.encode('utf8'))
4 print (h.hexdigest())#320df9832eab4c038b6c1d7ed73a5940

#要想保证hmac最终结果一致，必须保证：
#1:hmac.new括号内指定的初始key一样
#2:无论update多少次，校验的内容累加到一起是一样的内容

import hmac

h1=hmac.new(b'egon')
h1.update(b'hello')
h1.update(b'world')
print(h1.hexdigest())

h2=hmac.new(b'egon')
h2.update(b'helloworld')
print(h2.hexdigest())

h3=hmac.new(b'egonhelloworld')
print(h3.hexdigest())

'''
f1bf38d054691688f89dcd34ac3c27f2
f1bf38d054691688f89dcd34ac3c27f2
bcca84edd9eeb86f30539922b28f3981
'''

注意！


5、subprocess模块

import  subprocess

'''
sh-3.2# ls /Users/egon/Desktop |grep txt$
mysql.txt
tt.txt
事物.txt
'''

res1=subprocess.Popen('ls /Users/jieli/Desktop',shell=True,stdout=subprocess.PIPE)
res=subprocess.Popen('grep txt$',shell=True,stdin=res1.stdout,
                 stdout=subprocess.PIPE)

print(res.stdout.read().decode('utf-8'))


#等同于上面,但是上面的优势在于,一个数据流可以和另外一个数据流交互,可以通过爬虫得到结果然后交给grep
res1=subprocess.Popen('ls /Users/jieli/Desktop |grep txt$',shell=True,stdout=subprocess.PIPE)
print(res1.stdout.read().decode('utf-8'))


#windows下:
# dir | findstr 'test*'
# dir | findstr 'txt$'
import subprocess
res1=subprocess.Popen(r'dir C:UsersAdministratorPycharmProjects	est函数备课',shell=True,stdout=subprocess.PIPE)
res=subprocess.Popen('findstr test*',shell=True,stdin=res1.stdout,
                 stdout=subprocess.PIPE)

print(res.stdout.read().decode('gbk')) #subprocess使用当前系统默认编码，得到结果为bytes类型，在windows下需要用gbk解码

二、面向对象

1、面向对象与面向过程的区别：

面向过程：核心是过程二字，过程指的是问题的解决步骤，即先干什么再干什么，基于面向过程去设计程序就好比在设计一条流水线，是一种机械式的思维方式

优点：复杂的问题流程化，进而简单化

缺点：可扩展性差

应用：脚本程序，比如linux系统管理脚本，著名案例：linux内核、httpd、git

面向对象：核心是对象二字，对象就是特征与技能的结合体，如果把设计程序比喻成创造一个世界，那你就是这个世界的上帝，与面向过程对机械流水的模拟形成鲜明的对比，面向对象更加注重的对现实时间的模拟

优点：可扩展性

缺点：

类即种类，类别，对象是特种和技能的结合体，那么类就是一系列对象相似的特征与技能的结合体

在现实世界中：先有一个个具体存在的对象

在程序中：一定是先定义类，后调用类来产生对象

 1 class OldboyStudent:
 2     school = 'oldboy'    #类的数据属性
 3     def learn(self):      #类的函数属性
 4         print('is learning')
 5 
 6     def eat(self):
 7         print('is eating')
 8 #类体的代码在类定义阶段就会执行，理所当然会产生类的名称空间，用_dict_属性查看
 9 print(OldboyStudent.__dict__)
10 print(OldboyStudent.__dict__['school'])
11 print(OldboyStudent.__dict__['learn'])
12 
13 #类的属性操作
14 print(OldboyStudent.school)
15 print(OldboyStudent.learn)
16 OldboyStudent.x = 1111111111111
17 OldboyStudent.school = 'Oldboy'
18 del OldboyStudent.school
19 print(OldboyStudent.__dict__)
20 OldboyStudent.__dict__['x'] = 111111111   #类的属性无法用字典方式修改

类的定义和操作

产生程序中的对象：类名加括号，调用类，产生一个该类的实际存在的对象，该调用过程称为实例化，产生的结果又可以称为实例化对象

class OldboyStudent:
    school = 'oldboy' 
    count = 0
    def __init__(self,age,name,sex):  #在实例化时，产生对象之后执行
        self.age = age
        self.name = name
        self.sex = sex

    def learn(self):  
        print('%s is learning' %self.name)

obj1 = OldboyStudent(20,'小雨','male')
#分两步：
#第一步：产生一个空对象obj1
#第二步：OldboyStudent.__init__(obj1,20,'小雨','male')

print(obj1.__dict__)
OldboyStudent.learn(obj1)
obj1.learn()  #等同于OldboyStudent.learn(obj1)

obj1.name = '哈哈'               #对象可以通过__dict__修改私有属性
obj1.__dict__.pop('name')

2、继承：

指的是类与类之间的关系，是一种什么是什么的关系，功能之一就是用来解决代码重用的问题。继承是一种创建新类的方式，在python中，新建的类可以继承一个或多个父类，父类又可称为基类或超类，新建的类称为派生类或子类

继承分为单继承和多继承：

class ParentClass1:
    pass
class ParentClass2:
    pass
class SubClass1(ParentClass1):
    pass
class SubClass2(ParentClass1,ParentClass2):
    pass

print(SubClass1.__bases__)      #会以元组形式展示父类
print(SubClass2.__bases__)

在子类派生出的新的方法内重用父类的功能方式：指名道姓法
OldboyPeople.__init__ 这种调用方式本身与继承是没有关系的

class OldboyPeople:
    school = 'oldboy'
    def __init__(self,name,age,sex):
        self.name = name
        self.age = age
        self.sex = sex

    def eat(self):
        print('is eating')

class OldboyStudent(OldboyPeople):
    def __init__(self,name,age,sex,course):
        super().__init__(name,age,sex)
        self.course = course

    def tell_info(self):
        print('%s 选择了 %s课程' %(self.name,self.course))

yu_obj = OldboyStudent('xiaoyu',18,'male','Python')
yu_obj.tell_info()

super方法

class OldboyPeople:
    school = 'oldboy'
    def __init__(self,name,age,sex):
        self.name = name
        self.age = age
        self.sex = sex

    def eat(self):
        print('is eating')

class OldboyStudent(OldboyPeople):
    def __init__(self,name,age,sex):
        OldboyPeople.__init__(self,name,age,sex)

    def learn(self):
        print('%s is learning' %self.name)

class OldboyTeacher(OldboyPeople):
    def  __init__(self,name,age,sex,salary,title):
        OldboyPeople.__init__(self,name,age,sex)
        self.salary = salary
        self.title = title
        self.course = []
    def teach(self):
        print('%s is teaching' %self.name)

class Course:
    def __init__(self,course_name,course_period,course_price):
        self.course_name = course_name
        self.course_period = course_period
        self.course_price = course_price

    def tell_info(self):
        print('<课程名：%s 周期：%s 价格：%s>' %(self.course_name,self.course_period,self.course_price))

python = Course('Python','6mons',3000)
linux = Course('Linux','3mons',2000)
bigdata = Course('Bigdata','1mons',1000)
xy_obj = OldboyStudent('xiaoyu',18,'male')
egon_obj = OldboyTeacher('egon',28,'male',3.1,'沙河一霸')
egon_obj.course.append(linux)
egon_obj.course.append(python)

for i in  egon_obj.course:
    i.tell_info()

组合