今天对疫情数据进行了爬取
#-*- coding=utf-8 -*-
import
pymysql
import
re
import
json
from
pip._vendor
import
requests
def
create():
# 连接数据库
db
=
pymysql.connect(
"localhost"
,
"root"
,
"123"
,
"payiqing"
,charset
=
'utf8'
)
cursor
=
db.cursor()
cursor.execute(
"DROP TABLE IF EXISTS info"
)
sql
=
"""CREATE TABLE info(
Id INT PRIMARY KEY AUTO_INCREMENT,
Date varCHAR(255),
Province varchar(255),
City varchar(255),
Confirmed_num varchar(255),
Yisi_num varchar(255),
Cured_num varchar(255),
Dead_num varchar(255),
Code varchar(255))"""
#创建数据库变量
cursor.execute(sql)
db.close()
def
insert(value):
db
=
pymysql.connect(
"localhost"
,
"root"
,
"123"
,
"payiqing"
,charset
=
'utf8'
)
cursor
=
db.cursor()
sql
=
"INSERT INTO info(Date,Province,City,Confirmed_num,Yisi_num,Cured_num,Dead_num,Code) VALUES ( %s,%s,%s,%s,%s,%s,%s,%s)"
try
:
cursor.execute(sql, value)
db.commit()
print
(
'插入数据成功'
)
except
:
db.rollback()
print
(
"插入数据失败"
)
db.close()
# 创建表
create()
# 创建表
url
=
'https://raw.githubusercontent.com/BlankerL/DXY-2019-nCoV-Data/master/json/DXYArea.json'
response
=
requests.get(url)
# 将响应信息进行json格式化
versionInfo
=
response.text
# print(versionInfo)#打印爬取到的数据
# print("------------------------")#重要数据分割线↓
#一个从文件加载,一个从内存加载#json.load(filename)#json.loads(string)
jsonData
=
json.loads(versionInfo)
#用于存储数据的集合
dataSource
=
[]
provinceShortNameList
=
[]
confirmedCountList
=
[]
curedCount
=
[]
deadCountList
=
[]
#遍历对应的数据存入集合中
for
k
in
range
(
len
(jsonData[
'results'
])):
if
(jsonData[
'results'
][k][
'countryName'
]
=
=
'中国'
):
provinceShortName
=
jsonData[
'results'
][k][
'provinceName'
]
if
(
"待明确地区"
=
=
provinceShortName):
continue
;
for
i
in
range
(
len
(jsonData[
'results'
][k][
'cities'
])):
confirmnum
=
jsonData[
'results'
][k][
'cities'
][i][
'confirmedCount'
]
yisi_num
=
jsonData[
'results'
][k][
'cities'
][i][
'suspectedCount'
]
cured_num
=
jsonData[
'results'
][k][
'cities'
][i][
'curedCount'
]
dead_num
=
jsonData[
'results'
][k][
'cities'
][i][
'deadCount'
]
code
=
jsonData[
'results'
][k][
'cities'
][i][
'locationId'
]
cityname
=
jsonData[
'results'
][k][
'cities'
][i][
'cityName'
]
date
=
'2020-3-10'
insert((date,provinceShortName,cityname,confirmnum,yisi_num,cured_num,dead_num,code))