代码一段

import requests
from bs4 import beautifulsoup
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import re
import jieba
import numpy as np
from wordcloud import wordcloud,imagecolorgenerator

url = 'https://comment.bilibili.com/92542241.xml'
r = requests.get(url)
r.encoding = 'utfs'

soup = beautifulsoup(r.text,'lxml')
d = soup.find_all('d')

dlst = []
n = 0
for i in d:
n = n+1
danmuku = []
danmuku['弹幕'] = i.text
danmuku['网址'] = url
danmuku['时间'] = datetime.date.today()
dlst.append(danmuku)

df = pd.dataframe(dlst)

with open('sign.txt','w',encoding='utfs') as f:
for text in df['弹幕'].values:
pattern = re.compile()
filter_data = re.findall(pattern,text)
f.write('',join(filter_data))

with open('sign.txt','r',encoding='utfs') as f:
data = f.read()
segment = jieba.lcut(data)
words_df = pd.dataframe(('segment': segment))

word_stat = words_df.groupby(by=['segment']['segment'].agg(('计数':np.size))
word_stat = word_stat.reset_index().sort_values(by=['计数'],ascending=False)

color_mak = imread('01.jpg')

wordcloud = wordcloud
font_path=""
background_color="white"
max_word=3000
mask=color_mask
max_font_size=200
random_atate=100
width=1000,height=860,margin=2,

word_frequence = {x[0]:x[1] for x in words_stat.head(500).values}
word_frequence_dict = []
for key in word_frequence:
word_frequence_dict(key) = word_frequence[key]

wordcloud.generate_from_frequencies(word_frequence_dict)
wordcloud.to_file('output.png')
plt.imshow(wordcloud)
plt.axias('off')
plt.show()

今天也不知道写写什么,就敲这段代码讲爬虫的,回头再看,方便以后取用吧。
原文地址:https://www.cnblogs.com/medigrat/p/11755536.html