python 22day--python的模块与包

一、python模块

1、time和datetime模块：获取时间模块

print(time.strftime("%Y-%m-%d %X")) #格式化的时间字符串:'2017-02-15 11:40:53'

2、random模块：产生随机数模块

print(random.random())#(0,1)----float    大于0且小于1之间的小数

3、os模块：与操作系统交互的一个借口

os.getcwd() 获取当前工作目录，即当前python脚本工作的目录路径

4、sys模块：

1 sys.argv           命令行参数List，第一个元素是程序本身路径
2 sys.exit(n)        退出程序，正常退出时exit(0)
3 sys.version        获取Python解释程序的版本信息
4 sys.maxint         最大的Int值
5 sys.path           返回模块的搜索路径，初始化时使用PYTHONPATH环境变量的值
6 sys.platform       返回操作系统平台名称

5、shutil模块：高级的文件、文件夹、压缩包处理模块

1 1 import shutil
2 2 #将一个文件的内容拷贝到另一个文件
3 3 shutil.copyfileobj(open('old.xml','r'), open('new.xml', 'w'))

6、json和pickle模块：序列化模块（非常重要）

1 import json
2 x="[null,true,false,1]"
3 print(eval(x)) #报错，无法解析null类型，而json就可以
4 print(json.loads(x))

7、shelve模块：shelve模块比pickle模块简单，只有一个open函数，返回类似字典的对象，可读可写;key必须为字符串，而值可以是python所支持的数据类型

1 import shelve
2 
3 f=shelve.open(r'sheve.txt')
4 # f['stu1_info']={'name':'egon','age':18,'hobby':['piao','smoking','drinking']}
5 # f['stu2_info']={'name':'gangdan','age':53}
6 # f['school_info']={'website':'http://www.pypy.org','city':'beijing'}
7 
8 print(f['stu1_info']['hobby'])
9 f.close()

8、xml模块：解析xml文件模块

xml是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单，不过，古时候，在json还没诞生的黑暗年代，大家只能选择用xml呀，至今很多传统公司如金融行业的很多系统的接口还主要是xml。xml的格式如下，就是通过<>节点来区别数据结构的:

 1 <?xml version="1.0"?>
 2 <data>
 3     <country name="Liechtenstein">
 4         <rank updated="yes">2</rank>
 5         <year>2008</year>
 6         <gdppc>141100</gdppc>
 7         <neighbor name="Austria" direction="E"/>
 8         <neighbor name="Switzerland" direction="W"/>
 9     </country>
10     <country name="Singapore">
11         <rank updated="yes">5</rank>
12         <year>2011</year>
13         <gdppc>59900</gdppc>
14         <neighbor name="Malaysia" direction="N"/>
15     </country>
16     <country name="Panama">
17         <rank updated="yes">69</rank>
18         <year>2011</year>
19         <gdppc>13600</gdppc>
20         <neighbor name="Costa Rica" direction="W"/>
21         <neighbor name="Colombia" direction="E"/>
22     </country>
23 </data>
24 
25 xml数据

 1 import xml.etree.ElementTree as ET
 2  
 3 tree = ET.parse("xmltest.xml")
 4 root = tree.getroot()
 5 print(root.tag)
 6  
 7 #遍历xml文档
 8 for child in root:
 9     print('========>',child.tag,child.attrib,child.attrib['name'])
10     for i in child:
11         print(i.tag,i.attrib,i.text)
12  
13 #只遍历year 节点
14 for node in root.iter('year'):
15     print(node.tag,node.text)
16 #---------------------------------------
17 
18 import xml.etree.ElementTree as ET
19  
20 tree = ET.parse("xmltest.xml")
21 root = tree.getroot()
22  
23 #修改
24 for node in root.iter('year'):
25     new_year=int(node.text)+1
26     node.text=str(new_year)
27     node.set('updated','yes')
28     node.set('version','1.0')
29 tree.write('test.xml')
30  
31  
32 #删除node
33 for country in root.findall('country'):
34    rank = int(country.find('rank').text)
35    if rank > 50:
36      root.remove(country)
37  
38 tree.write('output.xml')

自己创建xml

 1 import xml.etree.ElementTree as ET
 2  
 3  
 4 new_xml = ET.Element("namelist")
 5 name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
 6 age = ET.SubElement(name,"age",attrib={"checked":"no"})
 7 sex = ET.SubElement(name,"sex")
 8 sex.text = '33'
 9 name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"})
10 age = ET.SubElement(name2,"age")
11 age.text = '19'
12  
13 et = ET.ElementTree(new_xml) #生成文档对象
14 et.write("test.xml", encoding="utf-8",xml_declaration=True)
15  
16 ET.dump(new_xml) #打印生成的格式

9、configparser模块：配置文件如下

# 注释1
; 注释2

[section1]
k1 = v1
k2:v2
user=egon
age=18
is_admin=true
salary=31
[section2]
k1 = v1

读取

 1 import configparser
 2 
 3 config=configparser.ConfigParser()
 4 config.read('a.cfg')
 5 
 6 #查看所有的标题
 7 res=config.sections() #['section1', 'section2']
 8 print(res)
 9 
10 #查看标题section1下所有key=value的key
11 options=config.options('section1')
12 print(options) #['k1', 'k2', 'user', 'age', 'is_admin', 'salary']
13 
14 #查看标题section1下所有key=value的(key,value)格式
15 item_list=config.items('section1')
16 print(item_list) #[('k1', 'v1'), ('k2', 'v2'), ('user', 'egon'), ('age', '18'), ('is_admin', 'true'), ('salary', '31')]
17 
18 #查看标题section1下user的值=>字符串格式
19 val=config.get('section1','user')
20 print(val) #egon
21 
22 #查看标题section1下age的值=>整数格式
23 val1=config.getint('section1','age')
24 print(val1) #18
25 
26 #查看标题section1下is_admin的值=>布尔值格式
27 val2=config.getboolean('section1','is_admin')
28 print(val2) #True
29 
30 #查看标题section1下salary的值=>浮点型格式
31 val3=config.getfloat('section1','salary')
32 print(val3) #31.0

改写

import configparser

config=configparser.ConfigParser()
config.read('a.cfg',encoding='utf-8')


#删除整个标题section2
config.remove_section('section2')

#删除标题section1下的某个k1和k2
config.remove_option('section1','k1')
config.remove_option('section1','k2')

#判断是否存在某个标题
print(config.has_section('section1'))

#判断标题section1下是否有user
print(config.has_option('section1',''))


#添加一个标题
config.add_section('egon')

#在标题egon下添加name=egon,age=18的配置
config.set('egon','name','egon')
config.set('egon','age',18) #报错,必须是字符串


#最后将修改的内容写入文件,完成最终的修改
config.write(open('a.cfg','w'))

10、hashlib模块： hash算法就像一座工厂，工厂接收你送来的原材料（可以用m.update()为工厂运送原材料），经过加工返回的产品就是hash值

import hashlib
 
m=hashlib.md5()# m=hashlib.sha256()
 
m.update('hello'.encode('utf8'))
print(m.hexdigest())  #5d41402abc4b2a76b9719d911017c592
 
m.update('alvin'.encode('utf8'))
 
print(m.hexdigest())  #92a7e713c30abbb0319fa07da2a5c4af
 
m2=hashlib.md5()
m2.update('helloalvin'.encode('utf8'))
print(m2.hexdigest()) #92a7e713c30abbb0319fa07da2a5c4af

'''
注意：把一段很长的数据update多次，与一次update这段长数据，得到的结果一样
但是update多次为校验大文件提供了可能。
'''

11、subprocess模块：

 1 import  subprocess
 2 
 3 '''
 4 sh-3.2# ls /Users/egon/Desktop |grep txt$
 5 mysql.txt
 6 tt.txt
 7 事物.txt
 8 '''
 9 
10 res1=subprocess.Popen('ls /Users/jieli/Desktop',shell=True,stdout=subprocess.PIPE)
11 res=subprocess.Popen('grep txt$',shell=True,stdin=res1.stdout,
12                  stdout=subprocess.PIPE)
13 
14 print(res.stdout.read().decode('utf-8'))
15 
16 
17 #等同于上面,但是上面的优势在于,一个数据流可以和另外一个数据流交互,可以通过爬虫得到结果然后交给grep
18 res1=subprocess.Popen('ls /Users/jieli/Desktop |grep txt$',shell=True,stdout=subprocess.PIPE)
19 print(res1.stdout.read().decode('utf-8'))
20 
21 
22 #windows下:
23 # dir | findstr 'test*'
24 # dir | findstr 'txt$'
25 import subprocess
26 res1=subprocess.Popen(r'dir C:UsersAdministratorPycharmProjects	est函数备课',shell=True,stdout=subprocess.PIPE)
27 res=subprocess.Popen('findstr test*',shell=True,stdin=res1.stdout,
28                  stdout=subprocess.PIPE)
29 
30 print(res.stdout.read().decode('gbk')) #subprocess使用当前系统默认编码，得到结果为bytes类型，在windows下需要用gbk解码

12、logging模块：日志模块（非常重要）

13、re模块：正则表达式模块（非常重要）

# ===========================re模块提供的方法介绍===========================
import re
#1
print(re.findall('e','alex make love') )   #['e', 'e', 'e'],返回所有满足匹配条件的结果,放在列表里
#2
print(re.search('e','alex make love').group()) #e,只到找到第一个匹配然后返回一个包含匹配信息的对象,该对象可以通过调用group()方法得到匹配的字符串,如果字符串没有匹配，则返回None。

#3
print(re.match('e','alex make love'))    #None,同search,不过在字符串开始处进行匹配,完全可以用search+^代替match

#4
print(re.split('[ab]','abcd'))     #['', '', 'cd']，先按'a'分割得到''和'bcd',再对''和'bcd'分别按'b'分割

#5
print('===>',re.sub('a','A','alex make love')) #===> Alex mAke love，不指定n，默认替换所有
print('===>',re.sub('a','A','alex make love',1)) #===> Alex make love
print('===>',re.sub('a','A','alex make love',2)) #===> Alex mAke love
print('===>',re.sub('^(w+)(.*?s)(w+)(.*?s)(w+)(.*?)$',r'52341','alex make love')) #===> love make alex

print('===>',re.subn('a','A','alex make love')) #===> ('Alex mAke love', 2),结果带有总共替换的个数


#6
obj=re.compile('d{2}')

print(obj.search('abc123eeee').group()) #12
print(obj.findall('abc123eeee')) #['12'],重用了obj