淘宝商品信息爬取,实现词云,并进行饼状图绘制及效果图

对应json文件地址:https://blog.csdn.net/nicholas_K/article/details/86094530

1. 获取了淘宝手机商品的评论和追评

2. 对淘宝评论进行了词云

3. 实现了淘宝手机商品版本的饼状图绘制

import json
import time
import pygal
import pymysql.cursors
from wordcloud import WordCloud

# 打开对应淘宝json文件。
def get_comments():
	# 这里打开的是上面的对应json文件,文件名要正确
    with open('tb_comments_1.json', encoding='utf-8') as tb:
        comments_dict = json.load(tb)

        # 这是找到追评里的内容
        comments = comments_dict['rateDetail']
        comments2 = comments['rateList']

        result_list = []
        for comment in comments2:
            # 判断是不是追评
            if comment['appendComment']:
                comment['appendComment'] =  comment['appendComment']["content"]
            # 添加到列表
            result_list.append({
                'id': comment['id'],
                'content': comment['appendComment'],
                'rateContent': comment['rateContent'],
                'auctionSku': comment['auctionSku'],
                'rateDate': comment['rateDate']
            })



        return result_list


# 连接数据库
def save_db(comments):
    connection = pymysql.connect(host='127.0.0.1',
                                 port=3306,
                                 user='root',
                                 password='zhangkai',
                                 db='tb',
                                 charset='utf8mb4',
                                 cursorclass=pymysql.cursors.DictCursor)
    try:
        connection.ping(reconnect=True)
    except:
        connection()
    for comment in comments:  # 循环评论
        cursor = connection.cursor()  # 创建游标
        # 先判断一下是否已存储过
        sql1 = "select id from tb.taobao where taobao_id=%s " % (comment['id'])
        cursor.execute(sql1)
        rs_set = cursor.fetchone()  # 有值返回{'id':23} 无值返回None
        if rs_set:
            print('这条评论已存在在数据库中')
            continue

        sql = """INSERT INTO tb.taobao VALUES (%s, %s ,%s ,%s ,%s, %s)"""
        for n in comments:
            cursor.execute(sql, args=(None, n["id"], n["rateContent"], n["auctionSku"], n["rateDate"], n["content"]))
        connection.commit()
        time.sleep(1.2)
        print('添加成功')

    sql2 = """select rate, content from tb.taobao"""
    cursor.execute(sql2)
    rs_set = cursor.fetchall()

    # 查询手机版本信息
    sql3 = """SELECT COUNT(*)as num,auctionint FROM tb.taobao group by auctionint"""
    cursor.execute(sql3)
    rs_sets = cursor.fetchall()
    return rs_set, rs_sets

# 把所有评论转成一个大字符串
def jieba_db(comments):
    jieba_list = comments
    tb_str = ''
    for i in jieba_list:
        tb_str += i['rate']

    return tb_str


# 生成词云
def word_cloud(string):
    # font是字体
    font = 'msyhl.ttc'
    wordcloud = WordCloud(font_path=font,
                          background_color="white",
                          width=1000,
                          height=860,
                          max_font_size=30,
                          min_font_size=10,
                          margin=2).generate(string)

    wordcloud.to_file('淘宝词云.png')

    return None

# 生成饼状图
def pygals(comments):
    x = 0
    for i in comments:
        x = x + i['num']

    pie_chart = pygal.Pie()
    pie_chart.title = '购买手机颜色比例(in % )'
    for i in comments:

        pie_chart.add(i['auctionint'], i['num']/x*100)
    pie_chart.render_to_file('淘宝.svg')
    # svg文件用浏览器打开

    print('绘图成功')


if __name__ == '__main__':
    comment = get_comments()
    save, banben = save_db(comments=comment)
    taobao_jieba = jieba_db(comments=save)
    ciyun = word_cloud(string=taobao_jieba)
    print(ciyun)

词云图片效果如下

在这里插入图片描述

饼状图效果如下

在这里插入图片描述
在这里插入图片描述

原文地址:https://www.cnblogs.com/nicholas7464/p/10257484.html