正则表达式-汉字的匹配方法

unicode :   ([u4e00-u9fa5]+)

unicode :  ([u2E80-u9FFF]+)

utf-8  :  ([x80-xff]+)

 1 #encoding:utf-8
 2 import re 
 3 
 4 
 5 
 6 def main():
 7     
 8     # ([u4e00-u9fa5]+)
 9     TEST_STR_1 = u'ab123kk123'
10     pattern_str = u'[0-9]+([u4e00-u9fa5]+)[0-9]+'
11     pattern = re.compile (pattern_str)
12     m = pattern.search(TEST_STR_1)
13     print m.group() if m is not None else None
14     print m.group(1) if m is not None else None
15     print '
'
16     
17     TEST_STR_2 = u'ab123汉字123'
18     m = pattern.search(TEST_STR_2)
19     print m.group() if m is not None else None
20     print m.group(1) if m is not None else None
21     print '
'
22     
23     # ([x80-xff]+)
24     TEST_STR_3 = 'ab123汉字123'
25     pattern_str = '[0-9]+([x80-xff]+)[0-9]+'
26     pattern = re.compile (pattern_str)
27     m = pattern.search(TEST_STR_3)
28     print m.group().decode('utf-8') if m is not None else None
29     print m.group(1).decode('utf-8') if m is not None else None
30     print '
'    
31     
32     # ([u2E80-u9FFF]+)
33     TEST_STR_2 = u'ab123汉字123'
34     pattern_str = u'[0-9]+([u2E80-u9FFF]+)[0-9]+'
35     pattern = re.compile (pattern_str)
36     m = pattern.search(TEST_STR_2)
37     print m.group() if m is not None else None
38     print m.group(1) if m is not None else None
39     print '
'
40     
41     
42     
43 if __name__ == '__main__':
44     main()
原文地址:https://www.cnblogs.com/mmix2009/p/3220456.html