16.常用模块【logging/*re】

logging
序列化模块***
json
pickle
re正则
元字符
字符集
分组()
命名分组
管道符 |
反斜杠
方法
re.finditer
re.search
re.match
re.split
re.sub
re.compile

logging

等级：

debug
info
warning
error
critical

warning以上的才打印反馈

>>> logging.debug('debug------')
>>>
>>> s = logging.debug('debug------')
>>> print(s)
None
>>> s = logging.warning('debug------')
WARNING:root:debug------
>>> s = logging.error('debug------')
ERROR:root:debug------
>>> s = logging.critical('debug------')
CRITICAL:root:debug------

配置的两种方式：
config：

import logging
logging.basicConfig(level = logging.DEBUG,
format = '%(asctime)s-[%(lineno)s]-%(message)s',
datefmt = '%Y-%m-%d %H:%M:%S',
filename = 'path',
filemode = 'a')

num = 1000
logging.info('cost %s'%num)

logger:

def get)logger():
logging.getLogger()

#文件显示指向
fh = logging.FileHandler('logger2')
#屏幕显示指向
sh = logging.StreamHandler()

#添加日志流
logger.addHandler(fh)
#添加日志流
logger.addHandler(sh)

#设定输出等级
logger.setLevel(logging.DEBUG)
#能单独设置fh或者sh的



#设置日志输出格式
fm = logging.Formatter('%(asctime)s-[%(lineno)s]-%(levelname)s-%(message)s')

#给文件流设置日志输出格式
fh.setFormatter(fm)
sh.setFormatter(fm)

序列化模块***

json

d = {'scott':['student','23'],'jerry':['engneer','22']}
#将传入的数据转换成json字符串
s = json.dumps(d)
#注意此时序列化之后的s的数据类型
print(type(s))
print(s)

with open('new.txt','w') as f:
    f.write(s)

f1 = open('new.txt')
data = f1.read()

#反序列化操作
data2 = json.loads(data)
#此时反序列化之后的数据类型
print(type(data2))
print(data2)

dump方式

f = open('new2','w')

json.dump(d,f)
# 1.将d转为字符串，2.将数据写入f
f.close

i = 10
s = 'hello'
t = (1,4,2)
l = [3,4,7]
d = {'name':'scott'}

json_str1 = json.dumps(i)
json_str2 = json.dumps(s)
json_str3 = json.dumps(t)
json_str4 = json.dumps(l)
json_str5 = json.dumps(d)

print(json.loads(json_str3))
#
# print(json_str1)
# print(json_str2)
# print(json_str3)
# print(json_str4)
# print(json_str5)

# json类型中没有元组类型，表现出来都是lis
# 再反序列化出来后，原来的tuple会变成list

练习

import json
d = {'info':{'name':'scott','age':'24'}}

f = open('new3.txt')
data = f.read()

json.loads(data)

# 符合json字符串的格式的数据，就可以取出；注意json字典类型中key必须使用双引号

pickle

只能在两个python程序间传递数据类型

import json
import datetime


print(datetime.datetime.now())
t = datetime.datetime.now()

d = {'data':t}

# 使用json会报错，json没有time类型
json.dump(d,open('new4','w'))

# pickle 能支持所有的python类型交换
import pickle
s = pickle.dumps(d)
print(s)
print(type(s))
f = open('new5','wb')

f.write(s)
f.close()

f = open('new5','rb')
data = pickle.loads(f.read())

print(data)

re正则

只针对字符串数据类型进行操作

对字符串的模糊匹配

>>> 'hello python java php c go'.replace('p','P')
'hello Python java PhP c go'
>>> 'hello python java php c go'.find('p')
6

使用正则：

>>> import re
>>> re.findall('d','fewhkhu3gk2hk6k3k4534523kgkgug')
['3', '2', '6', '3', '4', '5', '3', '4', '5', '2', '3']
>>> re.findall('d+','fewhkhu3gk2hk6k3k4534523kgkgug')
['3', '2', '6', '3', '4534523']

重点就是"d+"这个匹配规则
这个规则就是通过元字符来实现

我们平时默认使用的匹配方式就是贪婪匹配
单次获得匹配结果，按照所能获得最多值来进行匹配

如果想要获得“非贪婪匹配的结果”，可以在匹配规则的最后写一个？

>>> re.findall('d+','43434fefsesg64353fes512fes43f454fe')
['43434', '64353', '512', '43', '454']
>>> re.findall('d+?','43434fefsesg64353fes512fes43f454fe')
['4', '3', '4', '3', '4', '6', '4', '3', '5', '3', '5', '1', '2', '4', '3', '4', '5', '4']

元字符

.
匹配任何一个除了换行符以外的字符，其他例如' '、' '作一个字符看

>>> re.findall('p….n','hello python')
['python']

>>> re.findall('p….n','hello python pyhjhekhuehfwn')
['python']

*
[0,无穷次]；将前面的符号匹配零到无穷次，可以搭配.使用

>>> re.findall('p.*n','hello python pyhjhekhuehfwn')
['python pyhjhekhuehfwn']

>>> re.findall('ab*c','abbbbbbcbbbbbbc')
['abbbbbbc']

>>> re.findall('ab*c','aaaaaaaaaaac')
['ac']

+
[1,无穷次]

>>> re.findall('d+','fewhkhu3gk2hk6k3k4534523kgkgug')
['3', '2', '6', '3', '4534523']

>>> re.findall('ab+c','aaaaaaaaaaac')
[]

?
[0,1]

>>> re.findall('ab?c','abbbbbbbbbbbbbbc')
[]
>>> re.findall('ab?c','abc')
['abc']
>>> re.findall('ab?c','accccccccccccccccccc')
['ac']

{}
{n,m}

>>> re.findall('ab{0,}c','abbbbbbbbbbbbbbc')
['abbbbbbbbbbbbbbc']
>>> re.findall('ab{5,30}c','abbbbbbbbbbbbbbc')
['abbbbbbbbbbbbbbc']

字符集

表示一个‘或’的关系

>>> re.findall('a[bd]c','abcbbbqwebherwtehrbbbadc')
['abc', 'adc']

在字符集[]当中* + .等符号都变成了普通符号
除了- ^

>>> re.findall('a[*]c','abcbbbqwebherwtehrbbbadc')
[]
>>> re.findall('a[*]c','a*cbcbbbqwebherwtehrbbbadc')
['a*c']

>>> re.findall('a[1-9]c','a43434cbcbbbqwebherwtehrbbbadc')
[]
>>> re.findall('a[1-9]c','a4cbcbbbqwebherwtehrbbbadc')
['a4c']
>>> re.findall('a[1-9]*c','a4434646821cbcbbbqwebherwtehrbbbadc')
['a4434646821c']

^ 开始匹配

>>> re.findall('^ac','a4434646821acbcbbbqwebherwtehrbbbadc')
[]
>>> re.findall('^ac','ac4434646821cbcbbbqwebherwtehrbbbadc')
['ac']

但在字符集当中[^]表示取反

>>> re.findall('[^4]','ac4434646821cbcbbbqwebherwtehrbbbabc')
['a', 'c', '3', '6', '6', '8', '2', '1', 'c', 'b', 'c', 'b', 'b', 'b', 'q', 'w', 'e', 'b', 'h', 'e', 'r', 'w', 't', 'e', 'h', 'r', 'b', 'b', 'b', 'a', 'b', 'c']

>>> re.findall('[^d]','ac4434646821cbcbbbqwebherwtehrbbbabc')
['a', 'c', 'c', 'b', 'c', 'b', 'b', 'b', 'q', 'w', 'e', 'b', 'h', 'e', 'r', 'w', 't', 'e', 'h', 'r', 'b', 'b', 'b', 'a', 'b', 'c']

>>> re.findall('[^d].','ac4434646821cbcbbbqwebherwtehrbbbabc')
['ac', 'cb', 'cb', 'bb', 'qw', 'eb', 'he', 'rw', 'te', 'hr', 'bb', 'ba', 'bc']
>>> re.findall('[^d]*','ac4434646821cbcbbbqwebherwtehrbbbabc')
['ac', '', '', '', '', '', '', '', '', '', '', 'cbcbbbqwebherwtehrbbbabc', '']
>>> re.findall('[^d]+','ac4434646821cbcbbbqwebherwtehrbbbabc')
['ac', 'cbcbbbqwebherwtehrbbbabc']

3.$ 结尾匹配

>>> re.findall('abc$','ac4434646821cbcbbbqwebherwtehrbbbabc')
['abc']
>>> re.findall('cabc$','ac4434646821cbcbbbqwebherwtehrbbbabc')

注意，要严格匹配规则

分组`()`

>>> re.findall('ad+','addddddddd')
['addddddddd']
>>> re.findall('(ad)+','addddddddd')
['ad']
>>> re.findall('(ad)+scott','adddddscottddddfesgeg')
[]
>>> re.findall('(ad+)+scott','adddddscottddddfesgeg')
['addddd']
#涉及到分组的时候，优先获得分组内的内容，如果要取消分组内的特权，需要用到`?:`语法
>>> re.findall('(d)+scott','addddd354331351scottddddfesgeg')
['1']
>>> re.findall('(?:d)+scott','addddd354331351scottddddfesgeg')
['354331351scott']

命名分组

re.findall(r'w+articals\d{4}','scott45articals1234')

>>> re.findall(r'(w+).articals.(d{4})','scott45.articals.1234')
[('scott45', '1234')]


>>> re.search(r'(?P<author>w+).articals.(?P<id>d{4})','scott45.articals.1234')
<_sre.SRE_Match object; span=(0, 21), match='scott45.articals.1234'>

>>> ret = re.search(r'(?P<author>w+).articals.(?P<id>d{4})','scott45.articals.1234')
>>> ret.group('id')
'1234'

管道符 `|`

>>> re.findall('www.(?:oldboy|baidu).com','www.oldboy.com')
['www.oldboy.com']

反斜杠

能让普通符号变成有特殊功能的符号
d 匹配任何十进制数
D 匹配任何非数字字符；相当于[^0-9]
s 匹配任何空白字符‘ ’
S 匹配任何非空白字符
w 匹配任意一个数字或者字母，无法获得特殊符号或者空格
匹配任何边界字符

>>> re.findall('I','hello I am LIA')
[]
>>> re.findall(r'I','hello I am LIA')
['I']
>>> re.findall(r'I','hello:I:am:LIA')
['I']

使用r将字符串变为原生字符串

>>> re.findall(r'w+\articals\d{4}',r'scott45articals1234')
['scott45\articals\1234']

让特殊符号变为普通无功能的符号
.
*

获取运算式中的乘法运算

>>> re.findall('d*d','2*6+7*45+1.4*3-8/4')
['2*6', '7*4', '4*3']
>>> re.findall('d+.?d**d+.?d*','2*6+7*45+1.4*3-8/4')
['2*6', '7*45', '1.4*3']
# 关键在于抓取一个浮点数的模型`d+.?d*`

方法

re.finditer

返回一个迭代器
re.finditer('d','ad324das65')
想要取出值的话

re.search

匹配到第一个结果后，就不再往下匹配；匹配不到的时候会返回一个False

>>> re.search('d+','ad324das65')
<_sre.SRE_Match object; span=(2, 5), match='324'>
# span中的信息表示匹配对象的位置
>>> re.search('d+','ad324das65').group()
'324'
# 用.group()获取结果

re.match

match只在字符串开始的位置匹配，如果没有，就不返回任何结果

>>>re.match('d+','464684ad324das65').group()
'464684'
>>> re.match('d+','ad324das65').group()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
# 不能使用group方法
>>> re.match('d+','ad324das65')

re.split

模糊定义分隔符，对字符串进行分割

>>>re.split('d+','fhdfs153434efsfes13se1fse534sfe')
['fhdfs', 'efsfes', 'se', 'fse', 'sfe']
# 第三个参数定义分割次数，次数耗尽后一次性返回后面剩余的字符
>>>re.split('d+','fhdfs153434efsfes13se1fse534sfe',3)
['fhdfs', 'efsfes', 'se', 'fse534sfe']
# 两个分隔符相连的情况，会打出一个空字符串
>>>re.split('d','fhdfs153434efddsfes13se1fse534sfe',3)
['fh', 'fs153434ef', '', 'sfes13se1fse534sfe']

re.sub

替换
re.sub(规则，替换内容，原字符串)

>>>re.sub('d','*','fhdfs153434efddsfes13se1fse534sfe')
'fhdfs******efddsfes**se*fse***sfe'
# subn会多返回一个替换次数的结果
>>>re.subn('d','*','fhdfs153434efddsfes13se1fse534sfe')
('fhdfs******efddsfes**se*fse***sfe', 12)

re.compile

直接定义规则，之后对多个字符串进行操作的时候会提升便利的程度

>>> ret = re.compile('d+')
>>> ret.findall('fhdfs153434efddsfes13se1fse534sfe')
['153434', '13', '1', '534']

<wiz_tmp_tag id="wiz-table-range-border" contenteditable="false" style="display: none;">