用wordcloud模块生成词云

绘制词云

在 pubmed 中检索关键词 "antibody drug conjugate",得到文献的summary后生成词云

from Bio import Entrez
from wordcloud import WordCloud
# 在 pubmed 中检索关键词 "antibody drug conjugate",得到文献的summary后生产词云图
handle = Entrez.esearch(db="pubmed", usehistory='y', term="antibody and  drug and conjugate")
record = Entrez.read(handle)
web_env = record['WebEnv']
query_key = record['QueryKey']
handle = Entrez.efetch(db="pubmed", WebEnv=web_env, query_key=query_key, rettype="abstract", retmode='text')
with open('data.txt', "w", encoding="utf-8") as f:
    f.write(handle.read())

with open('data.txt', "r", encoding="utf-8") as f:
    text = f.read()
filter_list = ["Author", "information", "[Indexed", "MEDLINE]", "PMID:", "antibody", "drug", "conjugate"]

filtered_text  = []
text_list = text.split()
# 过滤掉一些意义不大,却频率很高的词
for w in text_list:
    if w not in filter_list:
        filtered_text.append(w)

text = ' '.join(filtered_text)
wc = WordCloud(width=1200, height=800)
wc.generate(text)
wc.to_file("word_cloud.png")
# todo 生成的词云没有给出很有意义的信息,需要进一步过滤单词

word_cloud

原文地址:https://www.cnblogs.com/YajunRan/p/11414980.html