python 在线生成文字云

在线生成文字云

在线生成文字云地址 http://a.leechg.com:8080/wordcloud

效果图

大体步骤

1 接收请求中的文本，通过结巴分词处理文本。

    seg_generator = jieba.cut(text)  # 使用结巴分词，也可以不使用
    stopwords = pd.read_csv( path +"/stop_words_zh_UTF-8.txt", index_col=False, quoting=3, sep="	", names=['stopword'],
                            encoding='utf-8')  # quoting=3全不引用
    seg_list = [i for i in seg_generator if i not in stopwords]
    seg_list = [i for i in seg_list if i != u' ']
    seg_list = r' '.join(seg_list)
    print seg_list
    return seg_list

2 生成图片并返回

    wc = WordCloud( font_path= path + '/simhei.ttf',#设置字体
                background_color="black", #背景颜色
                max_words=2000,# 词云显示的最大词数

                #max_font_size=100, #字体最大值
                random_state=42,
                )
    # 生成词云, 可以用generate输入全部文本(中文不好分词),也可以我们计算好词频后使用generate_from_frequencies函数
    wc.generate(seg_lisg)
    # wc.generate_from_frequencies(txt_freq)
    # txt_freq例子为[('词a', 100),('词b', 90),('词c', 80)]
    # 从背景图片生成颜色值
    #  wc.to_file("b.png")
    img = wc.to_image()
    return img

Python的web架构用的是Django url配置：

  url(r'^wordcloud$', word.get),

入口代码：

def get(request):
    text = ""
    try:
        text1 = request.POST['text']
        text = text1
    except Exception,e:
        print "not POST"
    try:
        text2 = request.GET['text']
        text = text2
    except Exception,e:
        print "not GET"
    print text
    img = word.getWordCloud(text)
    #image_data = img.read()
    rand = str(time.strftime("%Y-%m-%d-%H%M%S", time.localtime(time.time())))
    filename = path + "/temp/pic"+ rand +".png"
    img.save(filename)
    image_data = open(filename,"rb").read()
    return HttpResponse(image_data,content_type="image/png")

demo