数据爬取

要求:

编程爬取每日最新的疫情统计数据。

并将爬取结果导入到数据库中。

将可视化结果与统计数据结合,实时显示当前最新数据。

思路:用python爬取网易云事实数据,并将数据储存到数据库中

源程序代码:

import requests

import time, json

import pymysql

import sys;

def insert(date:str,provinae_list:list):

    db = pymysql.connect("localhost", "root", "123", "epidemic")

     

    cursor = db.cursor()

    for dict in provinae_list:

        for child_dict in dict['children']:

                data=(date,dict['name'],child_dict['name'],child_dict['total']['confirm'],child_dict['total']['suspect'],child_dict['total']['heal'],child_dict['total']['dead'],child_dict['id'])

                print(date,dict['name'],child_dict['name'],child_dict['total']['confirm'],child_dict['total']['suspect'],child_dict['total']['heal'],child_dict['total']['dead'],child_dict['id'])

                sql = "INSERT INTO info(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') "

                try:

                    cursor.execute(sql%data)

                    db.commit()

                    print('插入数据成功')

                except:

                    db.rollback()

                    print("插入数据失败")

    db.close()

    

def get_wangyi_request():

    url = 'https://c.m.163.com/ug/api/wuhan/app/data/list-total'

    headers = {

        'accept': '*/*',

        'accept-encoding': 'gzip,deflate,br',

        'accept-language': 'en-US,en;q=0.9,zh-CN;q = 0.8,zh;q = 0.7',

        'origin': 'https://wp.m.163.com',

        'referer': 'https://wp.m.163.com/',

        'sec-fetch-dest': 'empty',

        'sec-fetch-mode': 'cors',

        'sec-fetch-site': 'same-ite',

        'user-agent': 'Mozilla/5.0(WindowsNT10.0;Win64;x64) AppleWebKit/37.36 (KHTML, likeGecko) Chrome/82.0.4056.0 Safari/537.36 Edg/82.0.432.3'

    }

    result = requests.get(url, headers=headers)

    return result

def print_mess(string: str, dict_total: dict):

    sys.stdout.write(string + '确诊: ' + str(dict_total['confirm'] if dict_total['confirm'] != None else 0))

    sys.stdout.write(' ')

    sys.stdout.write(string + '疑似: ' + str(dict_total['suspect'] if dict_total['suspect'] != None else 0))

    sys.stdout.write(' ')

    sys.stdout.write(string + '治愈: ' + str(dict_total['heal'] if dict_total['heal'] != None else 0))

    sys.stdout.write(' ')

    sys.stdout.write(string + '死亡: ' + str(dict_total['dead'] if dict_total['dead'] != None else 0))

if __name__ == '__main__':

    result = get_wangyi_request()

    json_str = json.loads(result.text)['data']

    # print(json_str.keys())

    # dict_keys(['chinaTotal', 'chinaDayList', 'lastUpdateTime', 'areaTree'])

    print(json_str['lastUpdateTime'])

    provinae_list = json_str['areaTree'][2]['children']

    # 每个省份包含如下的键

    # dict_keys(['today', 'total', 'extData', 'name', 'id', 'lastUpdateTime', 'children'])

    

insert(json_str['lastUpdateTime'],provinae_list)

原文地址:https://www.cnblogs.com/2506236179zhw/p/13237969.html