51job词云

  爬取的51job上的python岗位任职要求,形成了词云:

# coding:utf-8
import jieba  #分词
import  matplotlib.pyplot as plt #数据可视化
import wordcloud
from  wordcloud import WordCloud,ImageColorGenerator,STOPWORDS #词云
import numpy  as np  #科学计算
from PIL import Image  #处理图片

#打开文本
text = open("workinfo.txt","rb").read()  #rb二进制读取,
textfile=text.decode("utf-8") #按照utf-8解码
textfile=textfile.replace("span","").replace("style","").replace("font","").replace("nbsp","")
textfile=textfile.replace("line","").replace("height","").replace("color","").replace("family","")
textfile=textfile.replace("size","").replace("宋体","").replace("rgb","").replace("white","")
textfile=textfile.replace("space","").replace("normal","").replace("backgroud","").replace("14px","")
textfile=textfile.replace("br","").replace("岗位","").replace("职责","").replace("0px","")
textfile=textfile.replace("Microsoft","").replace("YaHei","").replace("margin","").replace("top","")
textfile=textfile.replace("以上","").replace("上学","").replace("div","").replace("li","").replace("以及","")
textfile.replace("任职要求","")

wordlist=jieba.cut_for_search(textfile)
space_list=" ".join(wordlist)#链接词语
backgroud=np.array(Image.open("2.jpg")) #背景图片
mywordcloud=WordCloud(width=1800, height=1400,background_color="black", #背景颜色
                      mask=backgroud,#写字用的背景图,从背景图取颜色
                      max_words=200,  #最大词语数量
                      stopwords=STOPWORDS, #停止的默认词语
                      font_path="simkai.ttf", #字体
                      max_font_size=200, #最大字体尺寸
                      random_state=50,#随机角度
                      scale=2).generate(space_list) #生成词云

plt.rcParams['figure.dpi'] = 1800  # 修改dpi  修改图片大小
plt.rcParams['savefig.dpi']=1400   # 修改dpi
image_color=ImageColorGenerator(backgroud) #生成词云的颜色
plt.imshow(mywordcloud) #显示词云
plt.axis("off") #关闭保存
plt.savefig("python.png")
plt.show()
原文地址:https://www.cnblogs.com/my-global/p/12447315.html