二十七、正则表达式补充


import re

'''
正则表达式：
re.match:从头匹配
re.search:浏览全部字符串，匹配第一个符合规则的字符串
re.findall():将匹配到得的所有内容都放置在一个列表中
#re.finditer():
re.split():
re.sub():
'''

'''
1.match
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.match("hw+",origin)
print (r.group())         #获取匹配到得所有结果
print(r.groups())         #获取模型中匹配到的分组结果
print (r.groupdict())     #获取模型中匹配到的分组中的所有执行了key的分组
print ("-------------------------match1----------------------------")
---------------------------------------------------------------------
hello
()
{}
-------------------------match1----------------------------

---------------------------------------------------------------------


r=re.match("(hw+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match2---------------------------")

---------------------------------------------------------------------
hello
('hello',)
{}
--------------------------match2---------------------------

---------------------------------------------------------------------


r=re.match("(h)(w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match3---------------------------")

---------------------------------------------------------------------
hello
('h', 'ello')
{}
--------------------------match3---------------------------

---------------------------------------------------------------------


r=re.match("(?P<n1>h)(?P<n2>w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match4---------------------------")

---------------------------------------------------------------------


hello
('h', 'ello')
{'n1': 'h', 'n2': 'ello'}
--------------------------match4---------------------------

---------------------------------------------------------------------


'''
2.search:全字符串匹配
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.search("(tw+).*(?P<name>d)$",origin)
print (r.group())         #获取匹配到得所有结果
print(r.groups())         #获取模型中匹配到的分组结果
print (r.groupdict())     #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search1---------------------------")

---------------------------------------------------------------------

tom bcd tom lge tom acd 19
('tom', '9')
{'name': '9'}
--------------------------search1---------------------------

---------------------------------------------------------------------


origin = "hello tom bcd tom lge tom acd 19"
r=re.search("t(w+)",origin)
print (r.group())         #获取匹配到得所有结果
print(r.groups())         #获取模型中匹配到的分组结果
print (r.groupdict())     #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search2---------------------------")

---------------------------------------------------------------------
tom
('om',)
{}
--------------------------search2---------------------------

---------------------------------------------------------------------


'''
3.findall:匹配到的字符串放到列表(分组和不分组)
分组提取：从左到右，从外到内,有几个括号就取几次
'''
r=re.findall("d+wd+","a2b3c4d5")
print (r)
print ("--------------------------findall1---------------------------")

---------------------------------------------------------------------

['2b3', '4d5']
--------------------------findall1---------------------------

---------------------------------------------------------------------


r=re.findall("","a2b3c4d5")
print (r)
print ("--------------------------findall2---------------------------")

---------------------------------------------------------------------
['', '', '', '', '', '', '', '', '']
--------------------------findall2---------------------------

---------------------------------------------------------------------


origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("(t)(w+)(m)",origin)           #(w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall3---------------------------")

---------------------------------------------------------------------
[('t', 'om', 'm'), ('t', 'om', 'm'), ('t', 'om', 'm')]
--------------------------findall3---------------------------

---------------------------------------------------------------------


origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("((t)(w+)(m))",origin)           #(w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall4---------------------------")

---------------------------------------------------------------------
[('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm')]
--------------------------findall4---------------------------

---------------------------------------------------------------------


origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.findall("(t)(w+(m))(n)",origin)           #(w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall5---------------------------")

---------------------------------------------------------------------
[('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n')]
--------------------------findall5---------------------------

---------------------------------------------------------------------


'''
4.finditer()：返回迭代器
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.finditer("(t)(w+(m))(?P<name>n)",origin)           #(w+)中显示search中groups中的所有元素
print (r)
for i in r:
    print (r)
    print (i.group())
    print(i.groups())
    print(i.groupdict())
print ("--------------------------finditer1---------------------------")

---------------------------------------------------------------------

<callable_iterator object at 0x00000000025CF940>
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
<callable_iterator object at 0x00000000025CF940>
tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
--------------------------finditer1---------------------------

---------------------------------------------------------------------

'''
5.re.split():分割
split(pattern, string, maxsplit=0, flags=0):
pattern:正则
string：字符串
maxsplit：最大分割次数
flags：标志位，用于控制正则表达式的匹配方式，如：是否区分大小写，多行匹配等等
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
print (origin.split("t"))
r=re.split("tw+",origin,1)
print(r)
print ("--------------------------split1---------------------------")

---------------------------------------------------------------------
['hello ', 'omn bcd ', 'omn lge ', 'omn acd 19']
['hello ', ' bcd tomn lge tomn acd 19']
--------------------------split1---------------------------

---------------------------------------------------------------------


#只要有组，中间的分割值就可以拿到
r=re.split("(tw+)",origin,1)
print(r)
print ("--------------------------split2---------------------------")

---------------------------------------------------------------------
['hello ', 'tomn', ' bcd tomn lge tomn acd 19']
--------------------------split2---------------------------

---------------------------------------------------------------------


#去掉t和n，将()放在t之后n之前，提取后的内容不包含t和n
r=re.split("t(w+)n",origin,1)
print(r)
print ("--------------------------split3---------------------------")

---------------------------------------------------------------------
['hello ', 'om', ' bcd tomn lge tomn acd 19']
--------------------------split3---------------------------

---------------------------------------------------------------------

'''
计算器
'''
source="1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))"

#print (re.split("([^()]+)",source,1))
def func(x):
    return 1
while True:
    print(source)
    result = re.split("(([^()]+))",source,1)
    if len(result) == 3:
        before=result[0]
        content=result[1]
        after=result[2]
        #before,content,after=result   当result知道了确定的长度，可这样写
        r=func(content)
        #print(r)
        new_source=before+str(r)+after
        source=new_source
    else:
        m=func(source)
        print (m)
        break

---------------------------------------------------------------------
1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*1)-(-4*3)/(16-3*2))
1-2*(1-(-4*3)/(16-3*2))
1-2*(1-1/(16-3*2))
1-2*(1-1/1)
1-2*1
1

---------------------------------------------------------------------


'''
6.re.sub():
sub(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项
pattern:正则
repl：指定替换后的字符串
string：要替换的字符串
count：替换次数，默认所有
flags：标志位
subn(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项,并返回替换次数
'''
origin="fsd2agds3gsd4gsdga5gas7g8a8sdf"
r=re.sub("d+","OOO",origin,2)
print (r)
r=re.subn("d+","OOO",origin)
print (r)

---------------------------------------------------------------------
fsdOOOagdsOOOgsd4gsdga5gas7g8a8sdf
('fsdOOOagdsOOOgsdOOOgsdgaOOOgasOOOgOOOaOOOsdf', 7)

---------------------------------------------------------------------


============================================================================================================