第6章常用模块

一、logging模块

用于便捷记录且线程安全的模块

CRITICAL = 50

FATAL = CRITICAL

ERROR = 40

WARNING = 30

WARN = WARNING

INFO = 20

DEBUG = 10

NOTSET = 0

 1 import logging
 2 
 3 logging.debug('调试debug')
 4 logging.info('消息info')
 5 logging.warning('警告warn')
 6 logging.error('错误error')
 7 logging.critical('严重critical')
 8 
 9 '''
10 WARNING:root:警告warn
11 ERROR:root:错误error
12 CRITICAL:root:严重critical
13 '''
14 
15 part1: 默认打印到终端,默认级别为warning

默认打印到终端，默认级别为warning

 1 #======介绍
 2 可在logging.basicConfig()函数中可通过具体参数来更改logging模块默认行为，可用参数有
 3 filename：用指定的文件名创建FiledHandler（后边会具体讲解handler的概念），这样日志会被存储在指定的文件中。
 4 filemode：文件打开方式，在指定了filename时使用这个参数，默认值为“a”还可指定为“w”。
 5 format：指定handler使用的日志显示格式。
 6 datefmt：指定日期时间格式。
 7 level：设置rootlogger（后边会讲解具体概念）的日志级别
 8 stream：用指定的stream创建StreamHandler。可以指定输出到sys.stderr,sys.stdout或者文件，默认为sys.stderr。若同时列出了filename和stream两个参数，则stream参数会被忽略。
 9 
10 
11 format参数中可能用到的格式化串：
12 %(name)s Logger的名字
13 %(levelno)s 数字形式的日志级别
14 %(levelname)s 文本形式的日志级别
15 %(pathname)s 调用日志输出函数的模块的完整路径名，可能没有
16 %(filename)s 调用日志输出函数的模块的文件名
17 %(module)s 调用日志输出函数的模块名
18 %(funcName)s 调用日志输出函数的函数名
19 %(lineno)d 调用日志输出函数的语句所在的代码行
20 %(created)f 当前时间，用UNIX标准的表示时间的浮 点数表示
21 %(relativeCreated)d 输出日志信息时的，自Logger创建以 来的毫秒数
22 %(asctime)s 字符串形式的当前时间。默认格式是 “2003-07-08 16:49:45,896”。逗号后面的是毫秒
23 %(thread)d 线程ID。可能没有
24 %(threadName)s 线程名。可能没有
25 %(process)d 进程ID。可能没有
26 %(message)s用户输出的消息
27 
28 
29 
30 
31 #========使用
32 import logging
33 logging.basicConfig(filename='access.log',
34                     format='%(asctime)s - %(name)s - %(levelname)s -%(module)s:  %(message)s',
35                     datefmt='%Y-%m-%d %H:%M:%S %p',
36                     level=10)
37 
38 logging.debug('调试debug')
39 logging.info('消息info')
40 logging.warning('警告warn')
41 logging.error('错误error')
42 logging.critical('严重critical')
43 
44 
45 
46 
47 
48 #========结果
49 access.log内容:
50 2017-07-28 20:32:17 PM - root - DEBUG -test:  调试debug
51 2017-07-28 20:32:17 PM - root - INFO -test:  消息info
52 2017-07-28 20:32:17 PM - root - WARNING -test:  警告warn
53 2017-07-28 20:32:17 PM - root - ERROR -test:  错误error
54 2017-07-28 20:32:17 PM - root - CRITICAL -test:  严重critical
55 
56 part2: 可以为logging模块指定模块级的配置,即所有logger的配置

可以为logging模块指定模块级别的配置，即所有logger的配置

 1 import logging
 2 
 3 formatter=logging.Formatter('%(asctime)s - %(name)s - %(levelname)s -%(module)s:  %(message)s',
 4                     datefmt='%Y-%m-%d %H:%M:%S %p',)
 5 
 6 
 7 fh1=logging.FileHandler('test1.log')
 8 fh2=logging.FileHandler('test2.log')
 9 fh3=logging.FileHandler('test3.log')
10 ch=logging.StreamHandler()
11 
12 fh1.setFormatter(formatter) #也可以是不同的formater
13 fh2.setFormatter(formatter)
14 fh3.setFormatter(formatter)
15 ch.setFormatter(formatter)
16 
17 logger=logging.getLogger(__name__)
18 logger.setLevel(40)
19 
20 logger.addHandler(fh1)
21 logger.addHandler(fh2)
22 logger.addHandler(fh3)
23 logger.addHandler(ch)
24 
25 
26 
27 logger.debug('debug')
28 logger.info('info')
29 logger.warning('warning')
30 logger.error('error')
31 logger.critical('critical')
32 
33 part3:logging模块的Formatter，Handler，Logger，Filter的概念,见图

logging模块的Formatter,Handler,Logger,Filter的概念

图片链接：https://pan.baidu.com/s/1skWyTT7

logger是第一级过滤，然后才能到handler，可以给logger和handler同时设置level，但是需要注意的是

 1 Logger is also the first to filter the message based on a level — if you set the logger to INFO, and all handlers to DEBUG, you still won't receive DEBUG messages on handlers — they'll be rejected by the logger itself. If you set logger to DEBUG, but all handlers to INFO, you won't receive any DEBUG messages either — because while the logger says "ok, process this", the handlers reject it (DEBUG < INFO).
 2 
 3 
 4 
 5 #验证
 6 import logging
 7 
 8 
 9 form=logging.Formatter('%(asctime)s - %(name)s - %(levelname)s -%(module)s:  %(message)s',
10                     datefmt='%Y-%m-%d %H:%M:%S %p',)
11 
12 ch=logging.StreamHandler()
13 
14 ch.setFormatter(form)
15 # ch.setLevel(10)
16 ch.setLevel(20)
17 
18 l1=logging.getLogger('root')
19 # l1.setLevel(20)
20 l1.setLevel(10)
21 l1.addHandler(ch)
22 
23 l1.debug('l1 debug')
24 
25 重要，重要，重要！！！

View Code

 1 import logging
 2 
 3 formatter=logging.Formatter('%(asctime)s - %(name)s - %(levelname)s -%(module)s:  %(message)s',
 4                     datefmt='%Y-%m-%d %H:%M:%S %p',)
 5 
 6 ch=logging.StreamHandler()
 7 ch.setFormatter(formatter)
 8 
 9 
10 log1=logging.getLogger('root')
11 log2=logging.getLogger('root.child1')
12 log3=logging.getLogger('root.child1.child2')
13 
14 
15 log1.setLevel(10)
16 log2.setLevel(10)
17 log3.setLevel(10)
18 log1.addHandler(ch)
19 log2.addHandler(ch)
20 log3.addHandler(ch)
21 
22 log1.debug('log1 debug')
23 log2.debug('log2 debug')
24 log3.debug('log3 debug')
25 '''
26 2017-07-28 22:22:05 PM - root - DEBUG -test:  log1 debug
27 2017-07-28 22:22:05 PM - root.child1 - DEBUG -test:  log2 debug
28 2017-07-28 22:22:05 PM - root.child1 - DEBUG -test:  log2 debug
29 2017-07-28 22:22:05 PM - root.child1.child2 - DEBUG -test:  log3 debug
30 2017-07-28 22:22:05 PM - root.child1.child2 - DEBUG -test:  log3 debug
31 2017-07-28 22:22:05 PM - root.child1.child2 - DEBUG -test:  log3 debug
32 '''

logger继承

logging的实际应用，模板

 1 """
 2 logging配置
 3 """
 4 
 5 import os
 6 import logging.config
 7 
 8 # 定义三种日志输出格式 开始
 9 
10 standard_format = '[%(asctime)s][%(threadName)s:%(thread)d][task_id:%(name)s][%(filename)s:%(lineno)d]' 
11                   '[%(levelname)s][%(message)s]' #其中name为getlogger指定的名字
12 
13 simple_format = '[%(levelname)s][%(asctime)s][%(filename)s:%(lineno)d]%(message)s'
14 
15 id_simple_format = '[%(levelname)s][%(asctime)s] %(message)s'
16 
17 # 定义日志输出格式 结束
18 
19 logfile_dir = os.path.dirname(os.path.abspath(__file__))  # log文件的目录
20 
21 logfile_name = 'all2.log'  # log文件名
22 
23 # 如果不存在定义的日志目录就创建一个
24 if not os.path.isdir(logfile_dir):
25     os.mkdir(logfile_dir)
26 
27 # log文件的全路径
28 logfile_path = os.path.join(logfile_dir, logfile_name)
29 
30 # log配置字典
31 LOGGING_DIC = {
32     'version': 1,
33     'disable_existing_loggers': False,
34     'formatters': {
35         'standard': {
36             'format': standard_format
37         },
38         'simple': {
39             'format': simple_format
40         },
41     },
42     'filters': {},
43     'handlers': {
44         #打印到终端的日志
45         'console': {
46             'level': 'DEBUG',
47             'class': 'logging.StreamHandler',  # 打印到屏幕
48             'formatter': 'simple'
49         },
50         #打印到文件的日志,收集info及以上的日志
51         'default': {
52             'level': 'DEBUG',
53             'class': 'logging.handlers.RotatingFileHandler',  # 保存到文件
54             'formatter': 'standard',
55             'filename': logfile_path,  # 日志文件
56             'maxBytes': 1024*1024*5,  # 日志大小 5M
57             'backupCount': 5,
58             'encoding': 'utf-8',  # 日志文件的编码，再也不用担心中文log乱码了
59         },
60     },
61     'loggers': {
62         #logging.getLogger(__name__)拿到的logger配置
63         '': {
64             'handlers': ['default', 'console'],  # 这里把上面定义的两个handler都加上，即log数据既写入文件又打印到屏幕
65             'level': 'DEBUG',
66             'propagate': True,  # 向上（更高level的logger）传递
67         },
68     },
69 }
70 
71 
72 def load_my_logging_cfg():
73     logging.config.dictConfig(LOGGING_DIC)  # 导入上面定义的logging配置
74     logger = logging.getLogger(__name__)  # 生成一个log实例
75     logger.info('It works!')  # 记录该文件的运行状态
76 
77 if __name__ == '__main__':
78     load_my_logging_cfg()
79 
80 logging配置文件

logging配置文件

 1 """
 2 MyLogging Test
 3 """
 4 
 5 import time
 6 import logging
 7 import my_logging  # 导入自定义的logging配置
 8 
 9 logger = logging.getLogger(__name__)  # 生成logger实例
10 
11 
12 def demo():
13     logger.debug("start range... time:{}".format(time.time()))
14     logger.info("中文测试开始。。。")
15     for i in range(10):
16         logger.debug("i:{}".format(i))
17         time.sleep(0.2)
18     else:
19         logger.debug("over range... time:{}".format(time.time()))
20     logger.info("中文测试结束。。。")
21 
22 if __name__ == "__main__":
23     my_logging.load_my_logging_cfg()  # 在你程序文件的入口加载自定义logging配置
24     demo()

使用

另外一个django的配置

 1 #! /usr/bin/env python
 2 # -*- coding: utf-8 -*-
 3 # __author__ = "Q1mi"
 4 # Date: 2017/7/28
 5 
 6 
 7 
 8 LOGGING = {
 9     'version': 1,
10     'disable_existing_loggers': False,
11     'formatters': {
12         'standard': {
13             'format': '[%(asctime)s][%(threadName)s:%(thread)d][task_id:%(name)s][%(filename)s:%(lineno)d]'
14                       '[%(levelname)s][%(message)s]'
15         },
16         'simple': {
17             'format': '[%(levelname)s][%(asctime)s][%(filename)s:%(lineno)d]%(message)s'
18         },
19         'collect': {
20             'format': '%(message)s'
21         }
22     },
23     'filters': {
24         'require_debug_true': {
25             '()': 'django.utils.log.RequireDebugTrue',
26         },
27     },
28     'handlers': {
29         #打印到终端的日志
30         'console': {
31             'level': 'DEBUG',
32             'filters': ['require_debug_true'],
33             'class': 'logging.StreamHandler',
34             'formatter': 'simple'
35         },
36         #打印到文件的日志,收集info及以上的日志
37         'default': {
38             'level': 'INFO',
39             'class': 'logging.handlers.RotatingFileHandler',  # 保存到文件，自动切
40             'filename': os.path.join(BASE_LOG_DIR, "xxx_info.log"),  # 日志文件
41             'maxBytes': 1024 * 1024 * 5,  # 日志大小 5M
42             'backupCount': 3,
43             'formatter': 'standard',
44             'encoding': 'utf-8',
45         },
46         #打印到文件的日志:收集错误及以上的日志
47         'error': {
48             'level': 'ERROR',
49             'class': 'logging.handlers.RotatingFileHandler',  # 保存到文件，自动切
50             'filename': os.path.join(BASE_LOG_DIR, "xxx_err.log"),  # 日志文件
51             'maxBytes': 1024 * 1024 * 5,  # 日志大小 5M
52             'backupCount': 5,
53             'formatter': 'standard',
54             'encoding': 'utf-8',
55         },
56         #打印到文件的日志
57         'collect': {
58             'level': 'INFO',
59             'class': 'logging.handlers.RotatingFileHandler',  # 保存到文件，自动切
60             'filename': os.path.join(BASE_LOG_DIR, "xxx_collect.log"),
61             'maxBytes': 1024 * 1024 * 5,  # 日志大小 5M
62             'backupCount': 5,
63             'formatter': 'collect',
64             'encoding': "utf-8"
65         }
66     },
67     'loggers': {
68         #logging.getLogger(__name__)拿到的logger配置
69         '': {
70             'handlers': ['default', 'console', 'error'],
71             'level': 'DEBUG',
72             'propagate': True,
73         },
74         #logging.getLogger('collect')拿到的logger配置
75         'collect': {
76             'handlers': ['console', 'collect'],
77             'level': 'INFO',
78         }
79     },
80 }
81 
82 
83 # -----------
84 # 用法:拿到俩个logger
85 
86 logger = logging.getLogger(__name__) #线上正常的日志
87 collect_logger = logging.getLogger("collect") #领导说,需要为领导们单独定制领导们看的日志

django的logging配置文件

详细解释：

 1 import logging
 2 '''
 3 一:如果不指定filename,则默认打印到终端
 4 二:指定日志级别:
 5     指定方式:
 6         1:level=10
 7         2:level=logging.ERROR
 8 
 9     日志级别种类:
10         CRITICAL = 50
11         FATAL = CRITICAL
12         ERROR = 40
13         WARNING = 30
14         WARN = WARNING
15         INFO = 20
16         DEBUG = 10
17         NOTSET = 0
18 
19 三:指定日志级别为ERROR,则只有ERROR及其以上级别的日志会被打印
20 '''
21 
22 
23 logging.basicConfig(filename='access.log',
24                     format='%(asctime)s - %(name)s - %(levelname)s -%(module)s:  %(message)s',
25                     datefmt='%Y-%m-%d %H:%M:%S %p',
26                     level=10)
27 
28 logging.debug('debug')
29 logging.info('info')
30 logging.warning('warning')
31 logging.error('error')
32 logging.critical('critical')
33 logging.log(10,'log') #如果level=40,则只有logging.critical和loggin.error的日志会被打印

View Code

可在logging.basicConfig()函数中通过具体参数来更改logging模块默认行为，可用参数有
filename：用指定的文件名创建FiledHandler（后边会具体讲解handler的概念），这样日志会被存储在指定的文件中。
filemode：文件打开方式，在指定了filename时使用这个参数，默认值为“a”还可指定为“w”。
format：指定handler使用的日志显示格式。
datefmt：指定日期时间格式。
level：设置rootlogger（后边会讲解具体概念）的日志级别
stream：用指定的stream创建StreamHandler。可以指定输出到sys.stderr,sys.stdout或者文件，默认为sys.stderr。若同时列出了filename和stream两个参数，则stream参数会被忽略

查看详细：http://blog.csdn.net/zyz511919766/article/details/25136485

日志格式:

 1 #_*_coding:utf-8_*_
 2 __author__ = 'Linhaifeng'
 3 
 4 
 5 import logging
 6 formater=logging.Formatter('%(asctime)s - %(name)s - %(levelname)s -%(module)s:  %(message)s',
 7                     datefmt='%Y-%m-%d %H:%M:%S %p',)
 8 fh=logging.FileHandler('aaaaaaaaaaaa.log')
 9 ch=logging.StreamHandler()
10 
11 fh.setFormatter(formater)
12 ch.setFormatter(formater)
13 
14 
15 log1=logging.getLogger()
16 log1.setLevel(logging.ERROR)
17 
18 
19 log1.addHandler(fh)
20 log1.addHandler(ch)
21 
22 log1.debug('deubug')
23 log1.info('info')
24 log1.warning('warn')
25 log1.error('erro')
26 log1.critical('critical')
27 
28 即打印到终端又打印到文件

即打印到终端又打印到文件

二、time模块

在python中，通常有这几种方式来表示时间：

时间戳（timestamp）：通常来说，时间戳表示的是从1970年1月1日00:00:00开始按秒计算的偏移量。我们运行“type（time.time()）”，返回的是float类型。
格式化的时间字符串（Format String）
结构化的时间（struct_time）：struct_time元组共有9个元素（年，月，日，时，分，秒，一年中第几周，一年中第几天，夏令时）

1 import time
2 #-----------------------------------------------------------------------------
3 print(time.time())   #时间戳 1502204962.2637985
4 print(time.strftime("%Y-%m-%d %X")) #格式化时间字符串  “2017-08-08 23:10:32”
5 print(time.localtime()) #本地时区的结构化时间struct_time
6 print(time.gmtime())    #UTC标准时区结构化时间struct_time

其中计算机认识的时间只能是‘时间戳’格式，而程序员可处理的或者说人类能看懂的时间有：‘格式化的时间字符串’，‘结构化的时间’，于是有了下图的转换关系

 1 #--------------------------------按图1进行转换---------------------------------#
 2 #strftime(format[,t])：把一个代表时间的元组或者struct_time（如由time.localtime()和time.gmtime()返回）转化为格式化的时间字符串，
 3 # 如果t未指定，将传入time.localtime()。如果元组中任何一个元素越界，ValueError的错误将会被抛出。
 4 print(time.strftime('%Y-%m-%d %X',time.localtime()))
 5 print(time.strftime('%Y-%m-%d %X'))
 6 print(time.strftime('%Y-%m-%d %X',time.gmtime()))
 7 print(time.localtime().tm_mon)
 8 
 9 # time.strptime(string[,format])
10 # 把一个格式化时间字符串转化为struct_time，实际上它和strftime（）是逆操作
11 print(time.strptime('2017-08-08 15:40:43','%Y-%m-%d %X'))
12 
13 #time.struct_time(tm_year=2017, tm_mon=8, tm_mday=8, tm_hour=15, tm_min=40, tm_sec=43, tm_wday=1, tm_yday=220, tm_isdst=-1)
14 #在这个函数中，format默认为："%a %b %d %H:%M:%S %Y"。

1 #--------------------------------按图2转换时间-------------------------------#
2 #asctime([t])：把一个表示时间的元组或者struct_time表示为这种形式：'Wed Aug  9 00:09:21 2017'
3 #如果没有参数，将会将time.localtime()作为参数传入
4 print(time.asctime())
5 
6 #ctime([secs]):把一个时间戳（按秒计算的浮点数）转化为time.asctime()的形式。如果参数未给或者
7 #为None的时候，将会默认time.time()为参数，它的作用相当于time.asctime（time.localtime(sece)）。
8 print(time.ctime())   #Wed Aug  9 00:15:18 2017
9 print(time.ctime(time.time())) #Wed Aug  9 00:16:39 2017

1 #------------------其他用法---------------------#
2 time.sleep(3)
3 #线程推迟指定的时间运行，单位为秒

三、os模块

os模块是与操作系统交互的一个接口

 1 os.getcwd() 获取当前工作目录，即当前python脚本工作的目录路径
 2 os.chdir("dirname")  改变当前脚本工作目录；相当于shell下cd
 3 os.curdir  返回当前目录: ('.')
 4 os.pardir  获取当前目录的父目录字符串名：('..')
 5 os.makedirs('dirname1/dirname2')    可生成多层递归目录
 6 os.removedirs('dirname1')    若目录为空，则删除，并递归到上一级目录，如若也为空，则删除，依此类推
 7 os.mkdir('dirname')    生成单级目录；相当于shell中mkdir dirname
 8 os.rmdir('dirname')    删除单级空目录，若目录不为空则无法删除，报错；相当于shell中rmdir dirname
 9 os.listdir('dirname')    列出指定目录下的所有文件和子目录，包括隐藏文件，并以列表方式打印
10 os.remove()  删除一个文件
11 os.rename("oldname","newname")  重命名文件/目录
12 os.stat('path/filename')  获取文件/目录信息
13 os.sep    输出操作系统特定的路径分隔符，win下为"\",Linux下为"/"
14 os.linesep    输出当前平台使用的行终止符，win下为"	
",Linux下为"
"
15 os.pathsep    输出用于分割文件路径的字符串 win下为;,Linux下为:
16 os.name    输出字符串指示当前使用平台。win->'nt'; Linux->'posix'
17 os.system("bash command")  运行shell命令，直接显示
18 os.environ  获取系统环境变量
19 os.path.abspath(path)  返回path规范化的绝对路径
20 os.path.split(path)  将path分割成目录和文件名二元组返回
21 os.path.dirname(path)  返回path的目录。其实就是os.path.split(path)的第一个元素
22 os.path.basename(path)  返回path最后的文件名。如何path以／或结尾，那么就会返回空值。即os.path.split(path)的第二个元素
23 os.path.exists(path)  如果path存在，返回True；如果path不存在，返回False
24 os.path.isabs(path)  如果path是绝对路径，返回True
25 os.path.isfile(path)  如果path是一个存在的文件，返回True。否则返回False
26 os.path.isdir(path)  如果path是一个存在的目录，则返回True。否则返回False
27 os.path.join(path1[, path2[, ...]])  将多个路径组合后返回，第一个绝对路径之前的参数将被忽略
28 os.path.getatime(path)  返回path所指向的文件或者目录的最后存取时间
29 os.path.getmtime(path)  返回path所指向的文件或者目录的最后修改时间
30 os.path.getsize(path) 返回path的大小

os模块

在Linux和Mac平台上，该函数会原样返回path，在windows平台上会将路径中所有字符转换为小写，并将所有斜杠转换为饭斜杠。
>>> os.path.normcase('c:/windows\system32\')   
'c:\windows\system32\'   
   

规范化路径，如..和/
>>> os.path.normpath('c://windows\System32\../Temp/')   
'c:\windows\Temp'   

>>> a='/Users/jieli/test1/\a1/\\aa.py/../..'
>>> print(os.path.normpath(a))
/Users/jieli/test1

print(os.path.join('C:\','a','b','c','d.txt'))
print(os.path.join('C:\','a','b','D:\','c','d.txt'))
print(os.path.normcase('c:/wiNdows\system32\')  )
print(os.path.normpath('c://wIndows\System32\../Temp/')  )
a='/Users/jieli/test1/\a1/\\aa.py/../..'
print(os.path.normpath(a))

 1 os路径处理
 2 #方式一：推荐使用
 3 import os
 4 #具体应用
 5 import os,sys
 6 possible_topdir = os.path.normpath(os.path.join(
 7     os.path.abspath(__file__),
 8     os.pardir, #上一级
 9     os.pardir,
10     os.pardir
11 ))
12 sys.path.insert(0,possible_topdir)
13 
14 
15 #方式二：不推荐使用
16 os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

View Code

四、sys模块

1 sys.argv           命令行参数List，第一个元素是程序本身路径
2 sys.exit(n)        退出程序，正常退出时exit(0)
3 sys.version        获取Python解释程序的版本信息
4 sys.maxint         最大的Int值
5 sys.path           返回模块的搜索路径，初始化时使用PYTHONPATH环境变量的值
6 sys.platform       返回操作系统平台名称

1 import sys
2 import time
3 for i in range(100):
4     sys.stdout.write('
%s' % ('#' * i))
5     sys.stdout.flush()
6     time.sleep(0.5)

实现进度条一：sys.stdout

1 import sys
2 import time
3 for i in  range(100):
4     time.sleep(0.5)
5     print('
%s' %('#'*i),end='',file=sys.stdout,flush=True)

进度条实现二：print

 1 #==============知识储备====================#
 2 #指定宽度
 3 print('[%-10.3f]' %3.22)  #总宽度为10，保留3位小数点
 4 #打印结果
 5 #[3.220     ]
 6 
 7 #打印%号，用%%
 8 width=10
 9 print('[%%-%ds]' %width)
10 #打印结果
11 #[%-10s]
12 
13 #嵌套的%
14 width=10
15 print(('[%%-%ds]' %width) %('hello'))
16 #[hello     ]
17 
18 #=================实现打印进度条函数==================#
19 import sys
20 import time
21 def progress(percent,width=50):
22     if percent >=100:
23         percent=100
24     show_str=('[%%-%ds]' %width) %(int(width*percent/100)*'#')  #字符串拼接的嵌套使用
25     print("
%s %d%%"  %(show_str,percent),end='',file=sys.stdout,flush=True)
26 
27 #=====================应用=============================#
28 data_size=330333
29 recv_size=0
30 
31 while recv_size < data_size:
32     time.sleep(0.5)       #模拟数据的传输延迟
33     recv_size+=1024       #每次收1024
34     recv_per=int(100*(recv_size/data_size))   #接收的比例
35     progress(recv_per,width=30)             #进度条的宽度30

进度条应用

五、random模块

 1 import random
 2 print(random.random())  #(0,1) ----float  大于0且小于1之间的小数
 3 print(random.randint(1,3))    #[1,3]    大于等于1且小于等于3之间的整数
 4 print(random.randrange(1,3))  #[1,3]    大于等于1且小于3之间的整数
 5 print(random.choice([1,'23',[4,5]]))    #1或者23或者[4,5]
 6 print(random.sample([1,'23',[4,5],[10,11]],2))  #从指定序列中随机获取指定长度的片断。sample函数不会修改原有序列。
 7 print(random.uniform(1,3))    #大于1小于3的小数，如：2.9896973832195934
 8 
 9 item=[1,3,5,7,9]
10 random.shuffle(item)   #打乱item的顺序，相当于'洗牌'
11 print(item)

1 def make_code(n):
2     res=''
3     for i in range(n):
4         s1=chr(random.randint(65,90))
5         s2=str(random.randint(0,10))
6         res+=random.choice([s1,s2])
7     return res
8 
9 print(make_code(9))

生成随机码

六、序列化 json & pickle 模块

eval内置方法可以将一个字符串转换成python对象，不过，eval方法是有局限性的，对于普通的数据类型，json.loads和eval都能用，但遇到特殊类型的时候，eval就不管用了，所以eval的重点还是通常用来执行一个字符串表达式，并返回表达式的值。

1 import json
2 x="[null,true,false,1]"
3 # print(eval(x))     #报错，无法解析null类型，而json就可以
4 print(json.loads(x))

什么是序列化？

把对象（变量）从内存中变成可存储或传输的过程称为序列化，在python中叫picking，在其他语言中也被称之为serialization，marshalling，flattening等等，都是一个意思。

为什么要序列化？

1、持久保持状态

2、跨平台数据交互

序列化之后，不仅可以把序列化后的内容写入磁盘，还可以通过网络传输到别的机器上，如果收发的双方约定好使用一种序列化的格式，那么便打破了平台/语言差异化带来的限制，实现那跨平台数据交互。

反过来，把变量内容从序列化的对象重新读到内存里称之为反序列化，即unpiclking

1、json

要在不同的编程语言之间传递对象，就必须把对象序列化为标准格式，比如xml，但更好的方法是序列化为json，因为json表示出来就是一个字符串，可以被所有语言读取，也可以方便地存储到磁盘或者通过网络传输。json不仅是标准格式，并且比xml更快，而且可以直接在web页面中读取，非常方便。

json表示的对象就是标准的javaScript语言的对象，json和python内置的数据类型对应关系如下：

 1 import json
 2 dic={'name':'alvin','age':23,'sex':'male'}
 3 print(type(dic))  #<class 'dict'>
 4 
 5 j=json.dumps(dic)
 6 print(type(j))    #<class 'str'>
 7 
 8 f=open('test','w')
 9 f.write(j)           #-----等价于json.dummp(dic,f)
10 f.close()
11 #---------------------反序列化<br>
12 
13 f=open('test')
14 data=json.loads(f.read())  #等价于data=json.load(f)
15 print(data)
16 print(type(data))
17 
18 dic={'name':'egon','age':18}
19 json.dump(dic,open('b.json','w'))
20 print(json.load(open('b.json','r'))['name'])

 1 import json
 2 #dct="{'1':111}"#json 不认单引号
 3 #dct=str({"1":111})#报错,因为生成的数据还是单引号:{'one': 1}
 4 
 5 dct='{"1":"111"}'
 6 print(json.loads(dct))
 7 
 8 #conclusion:
 9 #        无论数据是怎样创建的，只要满足json格式，就可以json.loads出来,不一定非要dumps的数据才能loads
10 
11  注意点

注意点

2、pickle

 1 import pickle
 2 dic={'name':'alvin','age':23,'sex':'male'}
 3 print(type(dic)) #<class 'dict'>
 4 
 5 j=pickle.dumps(dic)
 6 print(type(j))       #<class 'bytes'>
 7 
 8 f=open('test_pickle','wb')  #注意w写入的是str，wb是写入bytes
 9 f.write(j)      #---------------等价于pickle.dump(dic,f)
10 f.close()
11 
12 #------------------------------反序列化
13 f=open('test_pickle','rb')
14 data=pickle.loads(f.read())      #等价于data=pickle.load(f)
15 print(data['name'])

七、shelve模块

shelve模块比pickle模块简单，只有一个open函数，返回类似字典的对象，可读，可写，key必须为字符串，而值可以是python所支持的数据类型

 1 import shelve
 2 #----------------------------------shelve序列化
 3 f=shelve.open(r'shelve.shl')
 4 f['stu1_info']={'name':'fang','age':28}   #可以直接以字典的方式写入
 5 f['stu2_info']={'name':'test','age':28}
 6 f.close()
 7 
 8 #-----------------------------------shelve反序列化
 9 odj=shelve.open(r'shelve.shl')
10 print(odj['stu1_info']['name'])

八、re模块

1、什么是正则？

正则就是用一些具有特殊含义的符号组合到一起（称为正则表达式）来描述字符或者字符串的方法，或者说：正则就是用来描述一类事务的规则。（在python中）它內嵌在python中，并通过re模块实现，正则表达式模式被编译成一系列的字节码，然后由用c编写的匹配引擎执行。

2、常用匹配模式（元字符）

模式	描述
w	匹配字母数字下划线
W	匹配非字母数字下划线
s	匹配任意空白字符，等价于[ f]
S	匹配任意非空字符
d	匹配任意数字，等价于[0-9]
D	匹配任意非数字
A	匹配字符串开始
	匹配字符串结束，如果是存在换行，只匹配到换行前结束字符串
z	匹配字符串结束
G	匹配最后匹配完成的位置
	匹配一个换行符
	匹配一个制表符
^	匹配字符串的开头
$	匹配字符串的结尾
.	匹配任意字符，除了换行符，当re.DOTALL标记被指定时，则可以匹配包括换行符的任意字符。
[....]	用来表示一组字符，单独列出：[amk]匹配'a'，'m'或'k'
[^...]	不在[]中的字符:[^abc]匹配除了a,b，c之外的字符。
*	匹配0个或多个的表达式。
+	匹配1个或多个的表达式。
?	匹配0个或1个由前面的正则表达式定义的片段，非贪婪方式。
{n}	精准匹配n个前面表达式
{n,m}	匹配n到m次由前面的正则表达式定义的片段，贪婪方式。
a\|b	匹配a或b。
()	匹配括号内的表达式，也表示一个组。

示例：

 1 #一对一的匹配
 2 'hello'.replace(old,new)
 3 'hello'.find('pattern')
 4 print('hello'.find('pattern'))
 5 
 6 #正则匹配
 7 import re
 8 #w与W
 9 print(re.findall('w','hello word 123'))         #['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'd', '1', '2', '3']
10 print(re.findall('W','hello word 123 
 	'))  #[' ', ' ', ' ', '
', ' ', '	']
11 
12 #s与S  #
 	都是空,都可以被s匹配
13 print(re.findall('s','hello word 123 
 	'))  #[' ', ' ', ' ', '
', ' ', '	']
14 print(re.findall('S','hello word 123 
 	'))  #['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'd', '1', '2', '3']
15 
16 #
与	
17 print(re.findall('
','hello word 123 
 	'))   #['
']
18 print(re.findall('	','hello word 123 
 	'))   #['	']
19 
20 #d与D
21 print(re.findall('d','hello word 123 
 	'))   #['1', '2', '3']
22 print(re.findall('D','hello word 123 
 	'))   #['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'd', ' ', ' ', '
', ' ', '	']
23 
24 #A与
25 print(re.findall('Ahe','hello word 123 
 	'))  #['he']  A == ^
26 print(re.findall('123','hello word 123'))        #['123']  == $
27 
28 #^与$
29 print(re.findall('^h','hello word 123'))          #['h']
30 print(re.findall('3$','hello word 123'))          #['3']
31 
32 #重复匹配： |.| * | ? |.* |.* ? | + | {n, m} |
33 # . 本身代表任意一个字符
34 print(re.findall('a.b','a1b a*b a b aaab')) #['a1b', 'a*b', 'a b', 'aab']
35 print(re.findall('a.b','a
b')) #[]
36 print(re.findall('a.b','a
b',re.S)) #['a
b']       #匹配空字符
37 print(re.findall('a.b','a
b',re.DOTALL)) #['a
b']  #匹配空字符
38 
39 #[]内部可以有多个字符，但是本身只配多个字符中的一个
40 print(re.findall('a[0-9][0-9]c','a a12c a1c a*c a2c a c a
c',re.S))
41 print(re.findall('a[a-zA-Z]c','aac abc aAc a12c a1c a*c a2c a c a
c',re.S))
42 print(re.findall('a[^a-zA-Z]c','aac abc aAc a12c a1c a*c a2c a c a
c',re.S))
43 print(re.findall('a[+/*-]c','a-c a+c a/c aac abc aAc a12c a1c a*c a2c a c a
c',re.S))
44 
45 #:转义
46 print(re.findall(r'a\c','ac abc')) #rawstring
47 
48 #? * + {}：左边有几个字符，如果有的话，贪婪匹配
49 # *左边那一个字符有0个或者无穷个
50 print(re.findall('ab*','a ab abb abbb abbbb bbbbbb'))  #['a', 'ab', 'abb', 'abbb', 'abbbb']
51 print(re.findall('ab{0,}','a ab abb abbb abbbb bbbbbb')) #['a', 'ab', 'abb', 'abbb', 'abbbb']
52 
53 #?左边那一个字符有0个或者1个
54 print(re.findall('ab?','aab a ab aaaa'))  #['a', 'ab', 'a', 'ab', 'a', 'a', 'a', 'a']
55 
56 #+左边那一个字符有1个或者无穷个
57 print(re.findall('ab+','a ab abb abbb abbbb bbbbbb'))  #['ab', 'abb', 'abbb', 'abbbb']
58 print(re.findall('ab{1,}','a ab abb abbb abbbb bbbbbb')) #['ab', 'abb', 'abbb', 'abbbb']
59 
60 #{n,m}左边的字符有n-m次
61 print(re.findall('ab{3}','a ab abb abbb abbbb bbbbbb')) #['abbb', 'abbb']
62 print(re.findall('ab{2,3}','a ab abb abbb abbbb bbbbbb')) #['abb', 'abbb', 'abbb']
63 
64 
65 # .* .*? 匹配所有
66 #.*贪婪匹配
67 print(re.findall('a.*c','a123c456c'))    #['a123c456c']
68 #.*?非贪婪匹配
69 print(re.findall('a.*?c','a123c456c'))  #['a123c']
70 
71 #|
72 print(re.findall('company|companies','Too many companies have gone bankrupt, and the next one is my company')) #['companies', 'company']
73 print(re.findall('compan|companies','Too many companies have gone bankrupt, and the next one is my company'))  #['compan', 'compan']
74 
75 #():分组，只返回（）内的
76 print(re.findall('ab','abababab123'))  #['ab', 'ab', 'ab', 'ab']
77 print(re.findall('(ab)','abababab123'))       #['ab', 'ab', 'ab', 'ab']
78 print(re.findall('(a)b','abababab123'))      #['a', 'a', 'a', 'a']
79 print(re.findall('a(b)','abababab123'))      #['b', 'b', 'b', 'b']
80 print(re.findall('(ab)+','abababab123'))     #['ab']
81 print(re.findall('(?:ab)+','abababab123'))  #['abababab']
82 
83 print(re.findall('compan(y|ies)','Too many companies have gone bankrupt, and the next one is my company'))    #['ies', 'y']
84 print(re.findall('compan(?:y|ies)','Too many companies have gone bankrupt, and the next one is my company'))  #['companies', 'company']

 1 # ===========================re模块提供的方法介绍===========================
 2 import re
 3 #1
 4 print(re.findall('e','alex make love') )   #['e', 'e', 'e'],返回所有满足匹配条件的结果,放在列表里
 5 #2#   re.search
 6 print(re.search('e','alex make love').group()) #e,只到找到第一个匹配然后返回一个包含匹配信息的对象,该对象可以通过调用group()方法得到匹配的字符串,如果字符串没有匹配，则返回None。
 7 print(re.search('ab','abababab123').group()) #匹配就不找了
 8 print(re.search('ab','12aasssdddssssssss3'))  #匹配不到返回None
 9 print(re.search('ab','12aasssdddsssssssab3sssssss').group())
10 #3  re.match
11 print(re.match('e','alex make love'))    #None,同search,不过在字符串开始处进行匹配,完全可以用search+^代替match
12 print(re.match('ab','123ab456')) #print(re.search('^ab','123ab456'))
13 #4 re.split 切割 re.split(pattern, string, maxsplit=0) maxsplit=1分离一次，默认为0，不限制次数
14 print(re.split('[ab]','abcd'))     #['', '', 'cd']，先按'a'分割得到''和'bcd',再对''和'bcd'分别按'b'分割
15 
16 #5 re.sub 替换
17 print('===>',re.sub('a','A','alex make love')) #===> Alex mAke love，不指定n，默认替换所有
18 print('===>',re.sub('a','A','alex make love',1)) #===> Alex make love
19 print('===>',re.sub('a','A','alex make love',2)) #===> Alex mAke love
20 print('===>',re.sub('^(w+)(.*?s)(w+)(.*?s)(w+)(.*?)$',r'52341','alex make love')) #===> love make alex
21 
22 print('===>',re.subn('a','A','alex make love')) #===> ('Alex mAke love', 2),结果带有总共替换的个数
23 
24 
25 #6 re.compile 编译正则表达式
26 obj=re.compile('d{2}')
27 
28 print(obj.search('abc123eeee').group()) #12
29 print(obj.findall('abc123eeee')) #['12'],重用了obj

练习：

1 #计算器作业参考：http://www.cnblogs.com/wupeiqi/articles/4949995.html
2 expression='1-2*((60+2*(-3-40.0/5)*(9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))'
3 
4 content=re.search('(([-+*/]*d+.?d*)+)',expression).group() #(-3-40.0/5)

作业：

开发一个简单的python计算器

1 实现加减乘除及拓号优先级解析
2 用户输入 1 - 2 * ( (60-30 +(-40/5) * (9-2*5/3 + 7 /3*99/4*2998 +10 * 568/14 )) - (-4*3)/ (16-3*2) )等类似公式后，必须自己解析里面的(),+,-,*,/符号和公式(不能调用eval等类似功能偷懒实现)，运算后得出结果，结果必须与真实的计算器所得出的结果一致

九、shutil模块

高级的文件、文件夹、压缩包处理模块

shutil.copyfileodj(fsrc,fdst [,length])

将文件内容拷贝到另一个文件中

1 import shutil
2 shutil.copyfileobj(open('a.xml','r'),open('b.xml','w'))

shutil.copyfile(src,dst)

拷贝文件

1 shutil.copyfile('f1.log','f2.log')   #目标文件无需存在

shutil.copymode(src,dst)

仅拷贝权限，内容，组，用户均不变。

1 shutil.copymode('f1.log','f2.log')   #目标文件必须存在

shutil.copystat(src,dst)

仅拷贝状态的信息，包括：mode bits，atime，mtime，flags

1 shutil.copystat('f1.log','f2.log')   #目标文件必须存在

shutil.copy(src,dst)

拷贝文件和权限

1 shutil.copy('f1.log','f2.log')

shutil.copy2(src,dst)

拷贝文件和状态信息

1 shutil.copy2('f1.log','f2.log')

shutil.ignore_patterns(*patterns)

shutil.copytree(src,dst,symlinks=False,ignore=None)

递归的去拷贝文件夹

1 shutil.copytree('a','b',ignore=shutil.ignore_patterns('b.xml')) #目标目录不能存在，注意对a目录父目录要有可写权限，ignore的意思是排除

shutil.copytree('a','b',symlinks=True,ignore=shutil.ignore_patterns('b.xml'))
'''
通常的拷贝都把软连接拷贝成硬链接，即对待软链接来说，创建新的文件
'''

拷贝软链接

shutil.rmtree（path[,ignore_errors[,onerror]）

递归的去删除文件

shutil.rmtree('b')

shutil.mone(src,dst)

递归的去移动文件，它类似mv命令，其实就是重命名

shutil.move('a','c')

shutil.make_archive(base_name,format,.....)

创建压缩包并返回文件路径，例如：zip、tar

base_name：压缩包的文件名，也可以是压缩包的路径。只是文件名时，则保存至当前目录，否则保存至指定路径，如data_bak =====》保存至当前路径，如：/tmp/data_bak ==》保存至/tmp/
format：压缩包种类，zip，tar，bztar，gztar
root_dir：要压缩的文件夹路径（默认当前目录）
owner：用户，默认当前用户
group：组，默认当前组
logger：用于记录日志，通常是logging.Logger对象

1 #将a下的文件打包放置当前程序目录
2 ret=shutil.make_archive('data_bak','gztar',root_dir='a')
3 
4 #将a下的文件打包放置/tmp/目录
5 ret=shutil.make_archive('/tmp/data_bak','gztar',root_dir='a')

shutil对压缩包的处理是调用ZipFile和TarFile两个模块来进行的，详细：

 1 import zipfile
 2 #压缩
 3 z= zipfile.ZipFile('data_bak.zip','w')
 4 z.write('f2.log')  #会把压缩包里的文件情况在写入
 5 z.close()
 6 
 7 #解压
 8 z=zipfile.ZipFile('data_bak.zip','r')
 9 z.extractall(path='.')
10 z.close()

zipfile压缩解压缩

 1 import tarfile
 2 #压缩
 3 t=tarfile.open('data_bak.tar.gz','w')
 4 t.add('s2.py')
 5 t.add('s1.py')
 6 t.close()
 7 
 8 #解压
 9 t=tarfile.open('data_bak.tar.gz','r')
10 t.extractall('/data_bak')
11 t.close()

tarfile压缩解压缩

十、xml模块

xml是实现不同语言或程序之间进行数据交换的协议，跟json差不多，但json使用起来更简单，不过，在json还没有诞生的年代，大家只能选择用xml，至今很多传统公司如金融行业的很多系统接口还是主要是xml。

xml的格式如下，就是通过<>节点来区别数据结构的：

 1 <?xml version="1.0"?>
 2 <data>
 3     <country name="Liechtenstein">
 4         <rank updated="yes">2</rank>
 5         <year>2008</year>
 6         <gdppc>141100</gdppc>
 7         <neighbor name="Austria" direction="E"/>
 8         <neighbor name="Switzerland" direction="W"/>
 9     </country>
10     <country name="Singapore">
11         <rank updated="yes">5</rank>
12         <year>2011</year>
13         <gdppc>59900</gdppc>
14         <neighbor name="Malaysia" direction="N"/>
15     </country>
16     <country name="Panama">
17         <rank updated="yes">69</rank>
18         <year>2011</year>
19         <gdppc>13600</gdppc>
20         <neighbor name="Costa Rica" direction="W"/>
21         <neighbor name="Colombia" direction="E"/>
22     </country>
23 </data>

xml数据

xml协议在各个语言里都是支持的，在python中可以用以下模块操作xml

1 # print(root.iter('year')) #全文搜索
2 # print(root.find('country')) #在root的子节点找，只找一个
3 # print(root.findall('country')) #在root的子节点找，找所有

 1 import xml.etree.ElementTree as ET
 2 
 3 tree = ET.parse('a.xml')
 4 root=tree.getroot()  #查看a.xml 里的跟
 5 print(root.tag)
 6 
 7 #遍历xml文档
 8 for child in root:
 9     print('=====>',child.tag)
10     for i in child:
11         print(i.tag,i.attrib,i.text)
12 
13 #查找element元素的三种方式
14 years=root.iter('year') #扫描整个xml文档树，找到所有
15 for i in years:
16     print(i)
17 
18 res1=root.find('country') #谁来调，就从谁下一层开始找,只找一个
19 print(res1)
20 
21 res2=root.findall('country') #谁来调，就从谁下一层开始找,只找所有
22 print(res2)
23 
24 #修改
25 years=root.iter('year') #扫描整个xml文档树，找到所有
26 for year in years:
27     year.text=str(int(year.text)+1)
28     year.set('updated','yes')
29     year.set('version','1.0')
30 tree.write('a.xml')
31 
32 #删除
33 for county in root.iter('country'):
34     # print(county.tag)
35     rank=county.find('rank')
36     if int(rank.text) > 10:
37         county.remove(rank)
38 tree.write('a.xml')
39 
40 #增加节点
41 for county in root.iter('country'):
42     e=ET.Element('egon')
43     e.text='hello'
44     e.attrib={'age':'18'}
45     county.append(e)
46 tree.write('a.xml')

自己创建xml文档

 1 import xml.etree.ElementTree as ET
 2  
 3  
 4 new_xml = ET.Element("namelist")
 5 name = ET.SubElement(new_xml,"name",attrib={"enrolled":"yes"})
 6 age = ET.SubElement(name,"age",attrib={"checked":"no"})
 7 sex = ET.SubElement(name,"sex")
 8 sex.text = '33'
 9 name2 = ET.SubElement(new_xml,"name",attrib={"enrolled":"no"})
10 age = ET.SubElement(name2,"age")
11 age.text = '19'
12  
13 et = ET.ElementTree(new_xml) #生成文档对象
14 et.write("test.xml", encoding="utf-8",xml_declaration=True)
15  
16 ET.dump(new_xml) #打印生成的格式

View Code

十一、configparser模块

配置文件如下：

 1 # 注释1
 2 ; 注释2
 3 
 4 [section1]
 5 k1 = v1
 6 k2:v2
 7 user=egon
 8 age=18
 9 is_admin=true
10 salary=31
11 
12 [section2]
13 k1 = v1

读取

 1 import configparser
 2 config=configparser.ConfigParser()
 3 config.read('a.cfg')
 4 
 5 #查看所有的标题
 6 res=config.sections()        #['section1', 'section2']
 7 print(res)
 8 
 9 #查看标题section1下所有key=value的key #print(config.options(config.sections()[0]))
10 options=config.options('section1')
11 print(options)     #['k1', 'k2', 'user', 'age', 'is_admin', 'salary']
12 
13 #查看标题section1下所有key=value的(key,value)格式
14 item_list=config.items('section1')
15 print(item_list)  #[('k1', 'v1'), ('k2', 'v2'), ('user', 'egon'), ('age', '18'), ('is_admin', 'true'), ('salary', '31')]
16 
17 #查看某个标题下的某个配置项的值===》字符串格式
18 val=config.get('section1','user')
19 print(val)  #egon
20 
21 #查看某个标题下的某个配置项的值===》整数格式
22 val=config.getint('section1','age')
23 print(val)   #18
24 
25 #查看某个标题下的某个配置项的布尔值
26 val=config.getboolean('section1','is_admin')
27 print(val) #True
28 
29 #查看标题section1下salary的值=>浮点型格式
30 val=config.getfloat('section1','salary')
31 print(val)  #31.0

修改

import configparser

config=configparser.ConfigParser()
config.read('a.cfg')


#删除整个标题section2
config.remove_section('section2')

#删除标题section1下的某个k1和k2
config.remove_option('section1','k1')
config.remove_option('section1','k2')

#判断是否存在某个标题
print(config.has_section('section1'))

#判断标题section1下是否有user
print(config.has_option('section1',''))


#添加一个标题
config.add_section('egon')

#在标题egon下添加name=egon,age=18的配置
config.set('egon','name','egon')
config.set('egon','age',18) #报错,必须是字符串


#最后将修改的内容写入文件,完成最终的修改
config.write(open('a.cfg','w'))

十二、hashlib模块

hash：一种算法，3.x里代替了md5模块和sha模块，主要提供SHA1，SHA224，SHA256，SHA384，SHA512，MD5算法。

三个特点：

1、内容相同则hash运算结果相同，内容稍微改变则变

2、不可逆推

3、相同算法：无论校验多长的数据，得到的哈希值长度固定。

 1 import hashlib
 2  
 3 m=hashlib.md5()# m=hashlib.sha256()
 4  
 5 m.update('hello'.encode('utf8'))
 6 print(m.hexdigest())  #5d41402abc4b2a76b9719d911017c592
 7  
 8 m.update('alvin'.encode('utf8'))
 9  
10 print(m.hexdigest())  #92a7e713c30abbb0319fa07da2a5c4af
11  
12 m2=hashlib.md5()
13 m2.update('helloalvin'.encode('utf8'))
14 print(m2.hexdigest()) #92a7e713c30abbb0319fa07da2a5c4af
15 
16 '''
17 注意：把一段很长的数据update多次，与一次update这段长数据，得到的结果一样
18 但是update多次为校验大文件提供了可能。
19 '''

以上加密算法虽然非常厉害，但时候存在缺陷，即：通过撞车可以反解。所以，有必要对加密算法中添加自定义key再来做加密

1 import hashlib
2 
3 m=hashlib.md5('baiducom'.encode('utf-8')) #15f1c4faad7a036902d4e3130dbec759
4 m.update('hello'.encode('utf-8'))  
5 print(m.hexdigest())

 1 import hashlib
 2 passwds=[
 3     'alex3714',
 4     'alex1313',
 5     'alex94139413',
 6     'alex123456',
 7     '123456alex',
 8     'a123lex',
 9     ]
10 def make_passwd_dic(passwds):
11     dic={}
12     for passwd in passwds:
13         m=hashlib.md5()
14         m.update(passwd.encode('utf-8'))
15         dic[passwd]=m.hexdigest()
16     return dic
17 
18 def break_code(cryptograph,passwd_dic):
19     for k,v in passwd_dic.items():
20         if v == cryptograph:
21             print('密码是===>33[46m%s33[0m' %k)
22 
23 cryptograph='aee949757a2e698417463d47acac93df'
24 break_code(cryptograph,make_passwd_dic(passwds))
25 
26 模拟撞库破解密码

模拟撞库破解密码

python还有一个hmac模块，它内部对我们创建key和内容进行进一步的处理然后再加密：

1 import hmac
2 h=hmac.new('baiducom'.encode('utf-8'))
3 h.update('hello'.encode('utf-8'))
4 print(h.hexdigest())   #ad65bcafe954d54650e5a1911c64d7c5

 1 #要想保证hmac最终结果一致，必须保证：
 2 #1:hmac.new括号内指定的初始key一样
 3 #2:无论update多少次，校验的内容累加到一起是一样的内容
 4 
 5 import hmac
 6 
 7 h1=hmac.new(b'egon')
 8 h1.update(b'hello')
 9 h1.update(b'world')
10 print(h1.hexdigest())
11 
12 h2=hmac.new(b'egon')
13 h2.update(b'helloworld')
14 print(h2.hexdigest())
15 
16 h3=hmac.new(b'egonhelloworld')
17 print(h3.hexdigest())
18 
19 '''
20 f1bf38d054691688f89dcd34ac3c27f2
21 f1bf38d054691688f89dcd34ac3c27f2
22 bcca84edd9eeb86f30539922b28f3981
23 '''

hmac初始值必须一致

十三、suprocess模块

 1 import  subprocess
 2 
 3 '''
 4 sh-3.2# ls /Users/egon/Desktop |grep txt$
 5 mysql.txt
 6 tt.txt
 7 事物.txt
 8 '''
 9 
10 res1=subprocess.Popen('ls /Users/jieli/Desktop',shell=True,stdout=subprocess.PIPE)
11 res=subprocess.Popen('grep txt$',shell=True,stdin=res1.stdout,
12                  stdout=subprocess.PIPE)
13 
14 print(res.stdout.read().decode('utf-8'))
15 
16 
17 #等同于上面,但是上面的优势在于,一个数据流可以和另外一个数据流交互,可以通过爬虫得到结果然后交给grep
18 res1=subprocess.Popen('ls /Users/jieli/Desktop |grep txt$',shell=True,stdout=subprocess.PIPE)
19 print(res1.stdout.read().decode('utf-8'))
20 
21 
22 #windows下:
23 # dir | findstr 'test*'
24 # dir | findstr 'txt$'
25 import subprocess
26 res1=subprocess.Popen(r'dir C:UsersAdministratorPycharmProjects	est函数备课',shell=True,stdout=subprocess.PIPE)
27 res=subprocess.Popen('findstr test*',shell=True,stdin=res1.stdout,
28                  stdout=subprocess.PIPE)
29 
30 print(res.stdout.read().decode('gbk')) #subprocess使用当前系统默认编码，得到结果为bytes类型，在windows下需要用gbk解码

第6章 常用模块

第6章常用模块