python开发_re和counter

python中re和counter的结合，可以实现以下的功能：

1.获取字符串或者文件中的单词组

2.对单词组进行统计

下面是我做的demo

运行效果：

=============================================

代码部分：

=============================================

 1 #python re and counter object
 2 '''
 3 读取一个文件，获取到该文件中的所有单词组，然后对该单词组进行个数统计，也可以根据
 4 条件统计，如：该单词组中出现最多的前number个单词
 5 '''
 6 import os
 7 import re
 8 from collections import Counter
 9 
10 def get_words(path):
11     '''读取一个文件中的内容，返回该文件中的所有单词'''
12     if os.path.exists(path):
13         return re.findall(r'w+', open(path).read().lower())
14     else:
15         print('the path [{}] is not exist!'.format(path))
16 
17 def get_most_common_words(words, number):
18     '''
19     如果<code>number > 0</code>,则返回该单词组中出现最多的前<code>number</code>个单词
20     否则，返回该单词组中所有统计情况
21     '''
22     if number > 0:
23         return Counter(words).most_common(number)
24     else:
25         return Counter(words)
26     
27 def main():
28     temp_path = 'c:\temp.txt'
29     number = 5
30     words = get_words(temp_path)
31     print(words)
32     print('#' * 50)
33     cnt = get_most_common_words(words, -1)
34     print(cnt)
35     print('#' * 50)
36     cnt = get_most_common_words(words, number)
37     print(cnt)
38 
39 if __name__ == '__main__':
40     main()