爬取最新疫情数据

题目:

编程爬取每日最新的疫情统计数据。

并将爬取结果导入到数据库中。

将可视化结果与统计数据结合,实时显示当前最新数据。

这次的作业与上次周的可视化可以整合成一个完整的代码,只需要在这次加上python爬取数据即可

本次爬取的是丁香医生网站的数据,网址为:https://ncov.dxy.cn/ncovh5/view/pneumonia

爬取的代码如下

 1 from os import path
 2 import requests
 3 from bs4 import BeautifulSoup
 4 import json
 5 import pymysql
 6 #import numpy as np
 7 import time
 8 from _ast import Try
 9 
10 url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0'  #请求地址
11 headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}#创建头部信息
12 response =  requests.get(url,headers = headers)  #发送网络请求
13 #print(response.content.decode('utf-8'))#以字节流形式打印网页源码
14 content = response.content.decode('utf-8')
15 #print(content)
16 soup = BeautifulSoup(content, 'html.parser')
17 listA = soup.find_all(name='script',attrs={"id":"getAreaStat"})
18 #世界确诊
19 listB = soup.find_all(name='script',attrs={"id":"getListByCountryTypeService2"})
20 #listA = soup.find_all(name='div',attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"})
21 account = str(listA)
22 world_messages = str(listB)[87:-21]
23 messages = account[52:-21]
24 messages_json = json.loads(messages)
25 world_messages_json = json.loads(world_messages)
26 valuesList = []
27 cityList = []
28 worldList = []
29 localtime = time.localtime(time.time())
30 L=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
31 print(L)
32 for i in range(len(messages_json)):
33     #value = messages_json[i]
34     #value = (messages_json[i].get('provinceName'),messages_json[i].get('provinceShortName'),messages_json[i].get('currentConfirmedCount'),messages_json[i].get('confirmedCount'),messages_json[i].get('suspectedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('comment'),messages_json[i].get('locationId'))
35     value = (messages_json[i].get('provinceName'),messages_json[i].get('confirmedCount'),messages_json[i].get('curedCount'),messages_json[i].get('deadCount'),messages_json[i].get('locationId'))
36     valuesList.append(value)
37     cityValue = messages_json[i].get('cities')
38     #print(cityValue) 一个省内没有划分开的值
39     for j in range(len(cityValue)):
40         #cityValueList = (cityValue[j].get('cityName'),cityValue[j].get('currentConfirmedCount'),cityValue[j].get('confirmedCount'),cityValue[j].get('suspectedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId'),messages_json[i].get('provinceShortName'))
41         cityValueList = (messages_json[i].get('provinceName'),cityValue[j].get('cityName'),cityValue[j].get('confirmedCount'),cityValue[j].get('curedCount'),cityValue[j].get('deadCount'),cityValue[j].get('locationId'))
42         #print(cityValueList)  省份内各个城市的值
43         cityList.append(cityValueList)
44     
45 #print(cityList)  #城市
46 #print(valuesList)  #省份
47 db=pymysql.connect("localhost","root","123456","payiqing", charset='utf8')
48 cursor = db.cursor()
49      
50 sql_city="insert into info_copy (Province,City,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,%s,'"+L+"')"
51 sql_province="insert into info_copy (Province,Confirmed_num,Cured_num,Dead_num,Code,Date) values (%s,%s,%s,%s,%s,'"+L+"')"
52 #print(sql)
53  
54 value_tuple= tuple(valuesList)
55 city_tuple=tuple(cityList)
56  
57 try:
58     cursor.executemany(sql_province,valuesList)
59     cursor.executemany(sql_city,city_tuple)
60     db.commit()
61 except:
62     print('执行失败,进入回调4')
63     db.rollback()   
64      
65      
66      
67 db.close()

加上上次的代码,效果如下图所示:

 

 psp表格

缺陷记录日志

原文地址:https://www.cnblogs.com/xueqiuxiang/p/12485778.html