python自动化之正则

import re

phoneNumRegex=re.compile(r'ddd-ddd-dddd')

mo=phoneNumRegex.search('My number is 415-555-4242.')

print('Phone number found: '+mo.group())

#######利用括号分组##############

phoneNumRegex=re.compile(r'(ddd)-(ddd-dddd)')

mo=phoneNumRegex.search('My number is 415-555-4242.')

mo.group(1)

mo.group(2)

mo.group(0)

mo.group()

mo.groups()

areaCode,mainNumber=mo.groups()

areaCode

mainNumber

################用管道匹配多个分组#######################

heroRegex=re.compile(r'Batman|Tina Fey')

mo1=heroRegex.search('Batman and Tina Fey')

mo1.group()

mo2=heroRegex.search('Tina Fey and Batman')

mo2.group()

batRegex=re.compile(r'Bat(man|mobile|copter|bat)')

mo=batRegex.search('Batmobile lost a wheel')

mo.group()

mo.group(1)

################用问号实现可选匹配#######################

batRegex=re.compile(r'Bat(wo)?man')

mo1=batRegex.search('The Adventures of Batman')

mo1.group()

mo2=batRegex.search('The Adventures of Batwoman')

mo2.group()

phoneRegex=re.compile(r'(ddd-)?ddd-ddd')

mo1=phoneRegex.search('My number is 415-555-4242')

mo1.group()

mo2=phoneRegex.search('My number is 555-4242')

mo2.group()

################用星号匹配零次或多次###################

batRegex=re.compile(r'Bat(wo)*man')

mo1=batRegex.search('The Adventures of Batman')

mo1.group()

mo2=batRegex.search('The Adventures of Batwoman')

mo2.group()

mo3=batRegex.search('The Adventures of Batwowowoman')

mo3.group()

################用加号匹配一次或多次###################

batRegex=re.compile(r'Bat(wo)+man')

mo1=batRegex.search('The Adventures of Batman')

mo1.group()

mo2=batRegex.search('The Adventures of Batwoman')

mo2.group()

mo3=batRegex.search('The Adventures of Batwowowoman')

mo3.group()

################用花括号匹配特定次数######################

haRegex=re.compile(r'(Ha){3}')

mo1=haRegex.search('HaHaHa')

mo1.group()

mo2=haRegex.search('Ha')

mo2==None

#################贪心和非贪心匹配####################################

##########正则表达式：贪心,在有二义的情况下,尽可能匹配最长的字符串###

greedyHaRegex=re.compile(r'(Ha){3,5}')

mo1=greedyHaRegex.search('HaHaHa')

mo1.group()

nongreedyHaRegex=re.compile(r'(Ha){3,5}?')

mo2=nongreedyHaRegex.search('HaHaHaHa')

mo2.group()

####################findall()方法###################################

###########包含被查找字符串中的所有匹配#############################

phoneNumRegex=re.compile(r'ddd-ddd-dddd')

phoneNumRegex.findall('Cell:415-555-9999 Work:215-555-0000')

###########################字符分类#################################

###d 表示0到9的任何数字

###D 表示除0到9的数字之外的任何字符

###w 表示任何字母、数字或下划线字符(可以认为是匹配“单词”字符)

###W 表示除字母、数字和下划线以外的任何字符

###s 表示空格、制表符或换行符(可以认为是匹配“单词”字符)

###S 表示除空格、制表符或换行符以外的任何字符

xmasRegex=re.compile(r'd+sw+')

xmasRegex.findall('12 drummers,11 pipers,10 lords,9 ladies,8 maids,7 swans,6 geese,5 rings,4 birds,3 hens,2 doves,1 partridge')

#######################建立自己的字符分类###########################

vowelRegex=re.compile(r'[aeiouAEIOU]')

vowelRegex.findall('RoboCop eats baby food.BABY FOOD')

########################插入字符和美元字符###########################

#########^表明匹配必须发生在被查找文本开始处#########################

#########$表明该字符串必须匹配该模式结束#############################

###########################通配字符##################################

#########.(句点)字符被称为"通配符"###################################

atRegex=re.compile(r'.at')

atRegex.findall('The cat in the hat sat on the flat mat')

###########################用点-星匹配所有字符######################

######点-星(.*)表示"任意文本"(贪心模式:总是匹配尽可能多的文本)######

####点-星-问号表示“任意文本”(非贪心模式：总是匹配尽可能少的文本)####

nongreedyRegex=re.compile(r'<.*?>')

mo=nongreedyRegex.search('<To serve man> for dinner.>')

mo.group()

greedyRegex=re.compile(r'<.*>')

mo=greedyRegex.search('<To serve man> for dinner.>')

mo.group()

#######################################################################

###############点-星将匹配除换行以外的所有字符#########################

###############通过传入re.DOTALL作为re.compile()的第二个参数###########

###############可以让句点字符匹配所有字符,包括换行字符#################

#######################################################################

##############向re.compile()传入re.IGNORECASE或re.I,不区分大小写#######

#############用sub()方法替换字符串#####################################

namesRegex=re.compile(r'Agent w+')

namesRegex.sub('CENSORED','Agent Alice gave the secret documents to Agent Bob.')

agentNamesRegex=re.compile(r'Agent (w)w*')

agentNamesRegex.sub(r'1****','Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.')

####################管理复杂的正则表达式###############################

phoneRegex=re.compile(r'''(

(d{3}|(d{3}))? #area code

(s|-|.)? #separator

d{3} #first 3 digits

(s|-|.) #separator

d{4} #last 4 digits

(s*(ext|x|ext.)s*d{2,5})? #extension

)''',re.VERBOSE)

#########################################################################

######################pyperclip模块复制和粘贴字符串######################

import pyperclip,re

####Create phone regex

phoneRegex=re.compile(r'''(

(d{3}|(d{3}))? #area code

(s|-|.)? #separator

d{3} #first 3 digits

(s|-|.) #separator

d{4} #last 4 digits

(s*(ext|x|ext.)s*d{2,5})? #extension

)''',re.VERBOSE)

####Create email regex

emailRegex=re.compile(r'''(

[a-zA-Z0-9._%+-]+ #username

@ #@ symbol

[s-zA-Z0-9.-]+ #domain name

(.[a-zA-Z]{2,4}) #dot-something

)''',re.VERBOSE)

####Find matches in clipboard text.

text=str(pyperclip.paste())

matches=[]

for groups in phoneRegex.findall(text):

phoneNum = '-'.join([groups[1],groups[3],groups[5]])

if groups[8] !='':

phoneNum += ' x'+groups[8]

matches.append(phoneNum)

####Copy results to the clipboard.

if len(matches)>0:

pyperclip.copy(' '.join(matches))

print('Copied to clipboard:')

print(' '.join(matches))

else:

print('No phone numbers or email addresses found.')

###示例1：

##############################强口令检测###################

###################长度不少于8个字符#######################

###################同时包含大写和小写字符##################

###################至少有一位数字##########################

import re

def checkLength(pwd):

IfOrNot=len(pwd)

if IfOrNot>=8:

return True

else:

return False

def checkUpperLetter(pwd):

UpperLetter=re.compile(r'[A-Z]+')

mo=UpperLetter.search(pwd)

if mo:

return True

else:

return False

def checkLowerLetter(pwd):

LowerLetter=re.compile(r'[a-z]+')

mo=LowerLetter.search(pwd)

if mo:

return True

else:

return False

def checkNumLetter(pwd):

LowerLetter=re.compile(r'[0-9]+')

mo=LowerLetter.search(pwd)

if mo:

return True

else:

return False

def checkPassword(pwd):

return (checkLength(pwd) and checkUpperLetter(pwd) and checkLowerLetter(pwd) and checkNumLetter(pwd))

###示例2：

####正则表达式,匹配每3位就有一个逗号的数字?必须匹配以下数字:

.'42'

.'1,234'

.'6,368,745'

####但不会匹配:

.'12,34,567'

.'1234'

numRegex=re.compile(r'^d{1,3}(,d{3})*$')

mo=numRegex.search('12,304,567')

mo.group()