二十七、正则表达式补充


import re

'''
正则表达式:
re.match:从头匹配
re.search:浏览全部字符串,匹配第一个符合规则的字符串
re.findall():将匹配到得的所有内容都放置在一个列表中
#re.finditer():
re.split():
re.sub():
'''

'''
1.match
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.match("hw+",origin)
print (r.group()) #获取匹配到得所有结果
print(r.groups()) #获取模型中匹配到的分组结果
print (r.groupdict()) #获取模型中匹配到的分组中的所有执行了key的分组
print ("-------------------------match1----------------------------")
---------------------------------------------------------------------
hello
()
{}
-------------------------match1----------------------------
---------------------------------------------------------------------

r=re.match("(hw+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match2---------------------------")
---------------------------------------------------------------------
hello
('hello',)
{}
--------------------------match2---------------------------
---------------------------------------------------------------------

r=re.match("(h)(w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match3---------------------------")
---------------------------------------------------------------------
hello
('h', 'ello')
{}
--------------------------match3---------------------------
---------------------------------------------------------------------

r=re.match("(?P<n1>h)(?P<n2>w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match4---------------------------")

---------------------------------------------------------------------


hello
('h', 'ello')
{'n1': 'h', 'n2': 'ello'}
--------------------------match4---------------------------
---------------------------------------------------------------------

'''
2.search:全字符串匹配
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.search("(tw+).*(?P<name>d)$",origin)
print (r.group()) #获取匹配到得所有结果
print(r.groups()) #获取模型中匹配到的分组结果
print (r.groupdict()) #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search1---------------------------")
---------------------------------------------------------------------

tom bcd tom lge tom acd 19
('tom', '9')
{'name': '9'}
--------------------------search1---------------------------
---------------------------------------------------------------------

origin = "hello tom bcd tom lge tom acd 19"
r=re.search("t(w+)",origin)
print (r.group()) #获取匹配到得所有结果
print(r.groups()) #获取模型中匹配到的分组结果
print (r.groupdict()) #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search2---------------------------")
---------------------------------------------------------------------
tom
('om',)
{}
--------------------------search2---------------------------
---------------------------------------------------------------------

'''
3.findall:匹配到的字符串放到列表(分组和不分组)
分组提取:从左到右,从外到内,有几个括号就取几次
'''
r=re.findall("d+wd+","a2b3c4d5")
print (r)
print ("--------------------------findall1---------------------------")
---------------------------------------------------------------------

['2b3', '4d5']
--------------------------findall1---------------------------
---------------------------------------------------------------------

r=re.findall("","a2b3c4d5")
print (r)
print ("--------------------------findall2---------------------------")
---------------------------------------------------------------------
['', '', '', '', '', '', '', '', '']
--------------------------findall2---------------------------
---------------------------------------------------------------------

origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("(t)(w+)(m)",origin) #(w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall3---------------------------")
---------------------------------------------------------------------
[('t', 'om', 'm'), ('t', 'om', 'm'), ('t', 'om', 'm')]
--------------------------findall3---------------------------
---------------------------------------------------------------------

origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("((t)(w+)(m))",origin) #(w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall4---------------------------")
---------------------------------------------------------------------
[('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm')]
--------------------------findall4---------------------------
---------------------------------------------------------------------

origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.findall("(t)(w+(m))(n)",origin) #(w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall5---------------------------")
---------------------------------------------------------------------
[('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n')]
--------------------------findall5---------------------------
---------------------------------------------------------------------

'''
4.finditer():返回迭代器
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.finditer("(t)(w+(m))(?P<name>n)",origin) #(w+)中显示search中groups中的所有元素
print (r)
for i in r:
print (r)
print (i.group())
print(i.groups())
print(i.groupdict())
print ("--------------------------finditer1---------------------------")
---------------------------------------------------------------------

<callable_iterator object at 0x00000000025CF940>
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
--------------------------finditer1---------------------------
---------------------------------------------------------------------
'''
5.re.split():分割
split(pattern, string, maxsplit=0, flags=0):
pattern:正则
string:字符串
maxsplit:最大分割次数
flags:标志位,用于控制正则表达式的匹配方式,如:是否区分大小写,多行匹配等等
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
print (origin.split("t"))
r=re.split("tw+",origin,1)
print(r)
print ("--------------------------split1---------------------------")
---------------------------------------------------------------------
['hello ', 'omn bcd ', 'omn lge ', 'omn acd 19']
['hello ', ' bcd tomn lge tomn acd 19']
--------------------------split1---------------------------
---------------------------------------------------------------------

#只要有组,中间的分割值就可以拿到
r=re.split("(tw+)",origin,1)
print(r)
print ("--------------------------split2---------------------------")
---------------------------------------------------------------------
['hello ', 'tomn', ' bcd tomn lge tomn acd 19']
--------------------------split2---------------------------
---------------------------------------------------------------------

#去掉t和n,将()放在t之后n之前,提取后的内容不包含t和n
r=re.split("t(w+)n",origin,1)
print(r)
print ("--------------------------split3---------------------------")

---------------------------------------------------------------------
['hello ', 'om', ' bcd tomn lge tomn acd 19']
--------------------------split3---------------------------
---------------------------------------------------------------------
'''
计算器
'''
source="1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))"

#print (re.split("([^()]+)",source,1))
def func(x):
return 1
while True:
print(source)
result = re.split("(([^()]+))",source,1)
if len(result) == 3:
before=result[0]
content=result[1]
after=result[2]
#before,content,after=result 当result知道了确定的长度,可这样写
r=func(content)
#print(r)
new_source=before+str(r)+after
source=new_source
else:
m=func(source)
print (m)
break
---------------------------------------------------------------------
1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*1)-(-4*3)/(16-3*2))
1-2*(1-(-4*3)/(16-3*2))
1-2*(1-1/(16-3*2))
1-2*(1-1/1)
1-2*1
1
---------------------------------------------------------------------

'''
6.re.sub():
sub(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项
pattern:正则
repl:指定替换后的字符串
string:要替换的字符串
count:替换次数,默认所有
flags:标志位
subn(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项,并返回替换次数
'''
origin="fsd2agds3gsd4gsdga5gas7g8a8sdf"
r=re.sub("d+","OOO",origin,2)
print (r)
r=re.subn("d+","OOO",origin)
print (r)
---------------------------------------------------------------------
fsdOOOagdsOOOgsd4gsdga5gas7g8a8sdf
('fsdOOOagdsOOOgsdOOOgsdgaOOOgasOOOgOOOaOOOsdf', 7)
---------------------------------------------------------------------

============================================================================================================
原文地址:https://www.cnblogs.com/chushujin/p/9408350.html