# coding=utf-8 import re import os path = os.path.abspath('.') def all_path(dirname): result = [] for maindir, subdir, file_name_list in os.walk(dirname): for filename in file_name_list: apath = os.path.join(maindir, filename) result.append(apath) return result def print_zh(document_list): all_text = [] for d in document_list: if '.txt' in d: with open(d, 'r', encoding="utf-8") as f: regStr = ".*?([u4E00-u9FA5]+).*?" text = f.read() target_text = re.findall(regStr, text) all_text += target_text print(all_text) print(set(all_text)) if __name__ == '__main__': print_zh(all_path(path))