正则表达式

 1 >>> re.findall(r'http://www|www.[a-z0-9-]*.[a-z]{2,3}','www.baidu.com')
 2 ['www.baidu.com']
 3 >>> re.findall(r'http://www|www.[a-z0-9-]*.[a-z]{2,3}','www.baidu.cn')
 4 ['www.baidu.cn']
 5 >>> re.findall(r'[0-9]{3}.[0-9.]{2}.[0-9]{1}','127.0.0.1')
 6 []
 7 >>> re.findall(r'[0-9]{3}.[0-9].{2}.[0-9]{1}','127.0.0.1')
 8 []
 9 >>> re.findall(r'[0-9]{3}.[0-9]{1}.[0-9]{1}.[0-9]{1}','127.0.0.1')  #0到9匹配三次,0到9匹配一次
10 ['127.0.0.1']
11 >>> s="Life can be good"
12 >>> re.match(r'[a-z]{3}',s)
13 >>> re.match(r'[a-z]{3}',s)
14 >>> re.findall(r'[a-z]{3}',s)
15 ['ife', 'can', 'goo']
1>>> re.findall(r'[a-z]{3}',s)     对一个字符串进行匹配
['ife', 'can', 'goo']
2>>> re.split('b',s,2)
['life can ', 'e ', 'ad']        将一个字符串分割
3>>> r=re.compile('w.sg')      将一个规则编译成compile对象
>>> s="life can be good"
>>> r.findall(s)
总结:正则表达式的原理:要匹配的内容,要匹配的次数,还有开头,结尾等一些特殊情况,
这样能将网页中一堆字符串找到复合规则的
 1 >>> re.sub('bad','good','life can be bad')
 2 'life can be good'
 3 >>> re.split('b',s)
 4 ['Life can ', 'e good']
 5 >>> re.split('b',s,2)        spliit()  将一个字符串分割成几部分
 6 ['Life can ', 'e good']
 7 >>> s="life can be bad"
 8 >>> re.split('b',s,2)
 9 ['life can ', 'e ', 'ad']
10 >>> re.findall(r'\bg.*?\b',s)
11 []
12 >>> re.findall(r'\bg.+?\b',s)
13 []
14 >>> s="python can run on windows"
15 >>> re.findall(r'\bo.+?\b',s)
16 []
17 >>> re.findall(r'\bo.+?\b',s)
18 []
19 >>> re.findall(r'\s',s)
20 []
21 >>> re.findall(r's',s)
22 [' ', ' ', ' ', ' ']
23 >>> re.findall(r'\b.+?.\b',s)
24 []
25 >>> re.findall(r'\b.+?\b',s)
26 []
27 >>> re.findall(r'\bw.+?\b',s)
28 []
29 >>> re.findall(r'w.+?',s)
30 ['python', 'can', 'run', 'on', 'windows']
31 >>> re.findall(r'w.+?',s)
32 ['python', 'can', 'run', 'on', 'windows']
33 >>> re.findall(r'.+?',s)
34 ['python', ' ', 'can', ' ', 'run', ' ', 'on', ' ', 'windows']
35 >>> r=re.compile('w.sg')
36 >>> s="life can be good"
37 >>> r.findall(s)
38 ['be g']
39 >>> s="life can be3g"
40 >>> r.findall(s)
41 []
42 >>> s="life can becc good"
43 >>> r.findall(s)
44 ['cc g']
45 >>> re.findall(r'w.dg','life can be9g')
46 ['be9g']
原文地址:https://www.cnblogs.com/caojunjie/p/6725903.html