IF

import sys
sys.path.append('../')

import jieba
import jieba.analyse
from optparse import OptionParser

USAGE = "博客园是一个面向开发者的知识分享社区。自创建以来,博客园一直致力并专注于为开发者打造一个纯净的技术交流社区,推动并帮助开发者通过互联网分享知识.";

parser = OptionParser(USAGE)
parser.add_option("-k", dest="topK")
opt, args = parser.parse_args()


if len(args) < 1:
print USAGE
sys.exit(1)

file_name = args[0]

if opt.topK is None:
topK = 10
else:
topK = int(opt.topK)

content = open(file_name, 'rb').read()

jieba.analyse.set_idf_path("../extra_dict/idf.txt.big");

tags = jieba.analyse.extract_tags(content, topK=topK)

print ",".join(tags)
原文地址:https://www.cnblogs.com/hgonlywj/p/4842678.html