python 正则表达式的处理

１．基本用法

#!/usr/bin/env python
# coding=utf-8

import re

# example 1
text ="fjsk test	 fjskd bar	 	test"
regex = re.compile('s+')
print regex.split(text)

# example 2
email ="""
    jfksdfasm@qq.com
    test@test.com.cn
    jfdskf@163.com
    jkmiao@yahoo.123com
    """

pattern = r'[A-Z0-9._%+-]＋@[A-Z0-9.-]+.[A-Z.0-9]{2,6}'
regex = re.compile(pattern,flags=re.IGNORECASE)

# get all 
print regex.findall(email)

# get the first one
m = regex.search(text)
# print email[m.start():m.end()]
print m

# replace 
print regex.sub('RECORD',email)


显示：


jkmiao@sysucis:~/workplace/python/test$ python regex.py 
['fjsk', 'test', 'fjskd', 'bar', 'test']
['jfksdfasm@qq.com', 'test@test.com.cn', 'jfdskf@163.com', 'jkmiao@yahoo.123com']
None

    RECORD
    RECORD
    RECORD
    RECORD

２．　分组，返回元组

#example 3

pattern = r'([A-Z0-9._%+=]+)@([A-Z0-9.-]+).([A-Z.]{2,5})'
regex = re.compile(pattern,flags=re.IGNORECASE)
m = regex.match('name@domain.suffix')
print m.groups()

print regex.findall(email)

# output

('name', 'domain', 'suffi')
[('jfksdfasm', 'qq', 'com'), ('test', 'gamil', 'com'), ('jfdskf', '163', 'com'), ('jkmiao', 'yahoo.com', 'cn')]

３．给分组加名称，返回字典

#example 4

regex = re.compile(r"""
                   (?P<userame>[A-Z0-9._%+-]+)
                   @(?P<domain>[A-Z0-9.-]+)
                   .
                   (?P<suffix>[A-Z0-9.]{2,4})
                   """,flags=re.IGNORECASE|re.VERBOSE)

m = regex.match("jkmaio@sysu.com")
print m.groupdict()

print regex.findall(email)

# output

jkmiao@sysucis:~/workplace/python/test$ python regex.py

{'domain': 'sysu', 'userame': 'jkmaio', 'suffix': 'com'}
[('jfksdfasm', 'qq', 'com'), ('test', 'gamil', 'com'), ('jfdskf', '163', 'com'), ('jkmiao', 'yahoo.com', 'cn')]

每天一小步，人生一大步！Good luck~