Python_正则表达式二

  1 '''
  2 正则表达式对象的sub(repl,string[,count=0])和subn(repl,string[,count=0])方法用来实现字符串替换功能
  3 '''
  4 example='''Beautiful is better than ugly.
  5 Explicit is better than implicit.
  6 Simple is better tha complex.
  7 Complext is better than nested.
  8 Sparse is better than dense.
  9 Readability counts.
 10 '''
 11 pattern = re.compile(r'bw*',re.I) #正则表达式对象，匹配以b或B开头的单词
 12 print(pattern.sub('*',example)) #将符合条件的单词替换为*
 13 # * is * than ugly.
 14 # Explicit is * than implicit.
 15 # Simple is * tha complex.
 16 # Complext is * than nested.
 17 # Sparse is * than dense.
 18 # Readability counts.
 19 print(pattern.sub('*',example,1))   #只替换1次
 20 # * is better than ugly.
 21 # Explicit is better than implicit.
 22 # Simple is better tha complex.
 23 # Complext is better than nested.
 24 # Sparse is better than dense.
 25 # Readability counts.
 26 print(re.compile(r'bw*'))  #匹配以字母b开头的单词
 27 print(pattern.sub('*',example,1))   #将符合条件的单词替换为*，只替换1次
 28 # * is better than ugly.
 29 # Explicit is better than implicit.
 30 # Simple is better tha complex.
 31 # Complext is better than nested.
 32 # Sparse is better than dense.
 33 # Readability counts.
 34 '''
 35 正则表达式对象呢的split(strign[,maxsplit = 0])方法用来实现字符串分隔.
 36 '''
 37 example = r'one,two,three.four/fivesix?seven[eight]nine|ten'
 38 pattern = re.compile(r'[,./\?[]|]')  #指定多个可能的分隔符
 39 print(pattern.split(example))
 40 # ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']
 41 example = r'one1two2three3four4five5six6seven7enght8nine9ten'
 42 pattern=re.compile(r'd+')
 43 print(pattern.split(example))
 44 # ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'enght', 'nine', 'ten']
 45 example = r'one two     three   four,five.six.seven,enght,nine9ten'
 46 pattern=re.compile(r'[s,.d]+')    #允许分隔符重复
 47 print(pattern.split(example))
 48 ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'enght', 'nine', 'ten']
 49 
 50 '''
 51 match对象：
 52     正则表达式模块或正则表达式对象的match()方能发和search()方法匹配成功后都会返回math()对象。match对象的主要方法有grou()（返回匹配的
 53 一个或多个子模式内容）、groups()(返回一个包含匹配的所有子模式内容的元组)、groupdict()（返回包含匹配的所有命名子模式内容字典）、start()
 54 (返回指定子模式内容的起始位置)、end()(返回指定子模式内容的结束位置的前一个位置)、span()(返回一个包含指定子模式内容起始位置和结束前一个位置
 55 的元组)等。下面的代码使用几种不同的方法来删除字符串中指定的内容：
 56 '''
 57 email='tony@tiremove_thisger.net'
 58 m=re.search('remove_this',email)    #使用search()方法返回的match对象
 59 print(email[:m.start()]+email[m.end()])    #字符串切片
 60 print(re.sub('remove_this','',email))  #直接使用re模块的sub()方法
 61 # tony@tiger.net
 62 print(email.replace('remove_this','')) #也可以直接使用字符串替换方法
 63 # tony@tiger.net
 64 
 65 m=re.match(r"(w+)(w+)","Isaac Newton,physicist")
 66 print(m.group(0))    #返回整个模式内容
 67 # Isaac
 68 print(m.group(1))   #返回第一个子模式内容
 69 # Isaa
 70 print(m.group(2))
 71 # c
 72 print(m.group(1,2))
 73 # ('Isaa', 'c')
 74 
 75 '''
 76 下面的代码演示了子模式扩展语法的用法
 77 '''
 78 m=re.match(r"(?P<first_name>w+)(?P<last_name>w+)","Malcolm Reynolds")
 79 print(m.group('first_name'))   #使用命名的子模式
 80 # Malcolm
 81 print(m.group('last_name'))
 82 # m
 83 m=re.match(r'(d+).(d+)','24.1632')
 84 print(m.groups())   #返回所有匹配的子模式(不包括第0个)
 85 # ('24', '1632')
 86 m=re.match(r'(?P<first_name>w+)(?P<last_name>w+)','Malcolm Reynolds')
 87 print(m.groupdict())    #以字典形式返回匹配的结果
 88 # {'first_name': 'Malcol', 'last_name': 'm'}
 89 exampleString = '''There should be one-and preferably only one-obvious way to do it.
 90 Although that way may not be obvioud at first unless you're Dutch.
 91 Now is better than never.
 92 Athought never is often better than right now.
 93 '''
 94 pattern =re.compile(r'(?<=ws)never(?=sw)')  #查找不在橘子开头和结尾的never
 95 matchResult = pattern.search(exampleString)
 96 print(matchResult.span())
 97 # (168, 173)
 98 pattern =re.compile(r'(?<=ws)never')  #查找位于句子末尾的单词
 99 mathResult=pattern.search(exampleString)
100 print(mathResult.span())
101 # (152, 157)
102 
103 pattern=re.compile(r'(?:iss)better(sthan)')   #查找前面是is的better than组合
104 matchResult=pattern.search(exampleString)
105 print(matchResult.span())
106 # (137, 151)
107 print(matchResult.group(0))
108 # is better than
109 print(matchResult.group(1))
110 # than
111 pattern=re.compile(r'(?i)nw+') #查找以n或N字母开头的所有单词
112 index=0
113 while True:
114     matchResult=pattern.search(exampleString,index)
115     if not matchResult:
116         break
117     print(matchResult.group(0),':',matchResult.span(0))
118     index=matchResult.end(0)
119 # not : (88, 91)
120 # Now : (133, 136)
121 # never : (152, 157)
122 # never : (168, 173)
123 # now : (201, 204)
124 pattern=re.compile(r'(?<!nots)be')   #查找前面没有单词not的单词be
125 index=0
126 while True:
127     matchResult=pattern.search(exampleString,index)
128     if not matchResult:
129         break
130     print(matchResult.group(0),':',matchResult.span(0))
131     index=matchResult.end(0)
132 # be : (13, 15)
133 print(exampleString[13:20] )   #验证一下结果是否准确
134 # be one-
135 pattern=re.compile(r'(w*)(?P<f>w+)(?P=f)w*') #匹配有连续想念痛字母的单词
136 index = 0
137 while True:
138     matchResult=pattern.search(exampleString,index)
139     if not matchResult:
140         break
141     print(matchResult.group(0),':',matchResult.group(2))
142     index=matchResult.end(0)+1
143 # unless : s
144 # better : t
145 # better : t
146 print(s)
147 # aaa    bb   c  d  e  fff
148 p=re.compile(r'(w*(?P<f>w+)(?P=f)w*)')
149 print(p.findall(s))
150 [('aaa', 'a'), ('bb', 'b'), ('fff', 'f')]