python正则学习

# 正则表达式是一门小型的语言:

import re
正则匹配分为(普通字符,元字符)
普通字符:大多数字符和字母都会和自身匹配
元字符: . ^ $ * + ? {} [] | ()

. 通配符,匹配一个字符
# ret = re.findall("a..x","heallxyysjh")
# print(ret) #['allx']

^ 以什么开头匹配
# ret = re.findall("^h....x","heallxyysjh")
# print(ret) #['heallx']

$ 以什么结尾的匹配
# ret = re.findall("y..h$","heallxyysjh")
# print(ret) #['ysjh']

* 重复匹配,范围0,无穷
# ret = re.findall("d*","sfdsgggdddddddhshh")
# print(ret) #['', '', 'd', '', '', '', '', 'ddddddd', '', '', '', '', '']
# ret = re.findall("alex*","sssalexxxxhhhale")
# print(ret) #['alexxxx', 'ale']

+ 重复匹配,范围1,无穷
# ret = re.findall("d+","sfdsgggdddddddhshh")
# print(ret) #['d', 'ddddddd']
# ret = re.findall("alex+","sssalexxxxhhhale")
# print(ret) #['alexxxx']

? 重复匹配,范围0,1 在贪婪匹配后面加?变成惰性匹配
# ret = re.findall("alex?","sssalexxxxhhhale")
# print(ret) #['alex', 'ale']

{} 匹配重复,范围自定义 {0,}==* {1,}==+ {0,1}==? 贪婪匹配,取最大
# ret = re.findall("alex{4}","sssalexxxxhhhale")
# print(ret) #['alexxxx']
# ret = re.findall("alex{0,}","sssalexxxxhhhale")
# print(ret) #['alexxxx', 'ale']

[] 字符集 特殊意义:- ^(非)
# ret = re.findall("q[a-z]","qabhhdnqdhd")
# print(ret) #['qa', 'qd'] a-z 代表a到z的小写字母

# ret = re.findall("([^()]*)","12+(10*5+(2*3-1+(5-2)))")
# print(ret) #['(5-2)'] 匹配最里面的括号运算

| 或
# ret = re.findall("ka|b","shjahkabcs")
# print(ret) #['ka', 'b']

() 分组

转义符
# 反斜杠后面跟元字符去除特殊功能,比如: .
# 反斜杠后面跟普通字符实现特殊功能,比如: d
# d 匹配任何十进制数:相当于[0-9]
# D 匹配任何非数字字符;相当于[^0-9]
# s 匹配任何空白字符;相当于[ fv]
# S 匹配任何非空白字符;相当于[^ v]
# w 匹配任何字母数字字符;相当于[a-zA-Z0-9_]
# W 匹配任何非字母数字字符;相当于[^a-zA-Z0-9_]
#  匹配一个特殊字符边界,比如空格 & # 等

split 分割匹配
# ret = re.split(" ","abc cd df")
# print(ret) #['abc', 'cd', 'df'] 按空格匹配

# ret = re.split("[ |]","abc cd|df")
# print(ret) #['abc', 'cd', 'df'] 按空格和| 匹配

sub 替换匹配
# ret = re.sub("d+","A","hsjk22ssd212ss22")
# print(ret) #hsjkAssdAssA
# sub 替换匹配并统计替换次数,结果是一个元组
# ret = re.subn("d+","A","hsjk22ssd212ss22")
# print(ret) #('hsjkAssdAssA', 3)

compile 编译
# obj = re.compile("d+")
# ret = obj.findall("alex18jim19kim20") #调用编译后的变量,传入值只需要一个
# print(ret) #['18', '19', '20']

finditer 迭代器
# ret = re.finditer("d+","alex33king18jim22")
# print(ret) #<callable_iterator object at 0x005912B0> 迭代器生成对象
# print(next(ret).group()) #33
# print(next(ret).group()) #18
# print(next(ret).group()) #22


原文地址:https://www.cnblogs.com/Tang-Yuan/p/12611990.html