12月14

以下是数据爬取代码:

复制代码
复制代码
# @Author Hero_poi
import requests
import json
import pymysql

# 获得页面信息
session = requests.session()
result = session.get('https://api.inews.qq.com/newsqa/v1/automation/foreign/country/ranklist')
res = json.loads(result.text)
s1 = json.dumps(res["data"])
data_all = json.loads(s1)
print(data_all[184])

# 获取时间
today = data_all[0]["y"] + '.' + data_all[0]["date"]
data_p = []


#构建数组
for x in range(0, 185):
    name = data_all[x]["name"]
    continent = data_all[x]["continent"]
    date = data_all[x]["y"] + '.' + data_all[x]["date"]
    confirmAdd = data_all[x]["confirmAdd"]
    confirm = data_all[x]["confirm"]
    suspect = data_all[x]["suspect"]
    dead = data_all[x]["dead"]
    heal = data_all[x]["heal"]
    nowConfirm = data_all[x]["nowConfirm"]
    pk = name + date
    data_p.append([pk, name, continent, date, confirmAdd, confirm, suspect, dead, heal, nowConfirm])

# 连接数据库
conn = pymysql.connect(host="127.0.0.1", port=3306, user="root", password="260702266", database="hope", charset="utf8")
cursor = conn.cursor()

pro = "delete from datas where country = '马提尼克岛'"

try:
    cursor.execute(pro)
    conn.commit()
except:
    conn.rollback()

count = 0

sql = "insert into datas(id,country,continent,date,confirmAdd,confirm,suspect,dead,heal,nowConfirm) values(%s,%s," \
        "%s,%s,%s,%s,%s,%s,%s,%s) "
try:
    count = count+1
    cursor.executemany(sql, data_p)
    conn.commit()
except:
    conn.rollback()
    count = count + 1

query = "update redodata set date = '" + today + "' where name = 'today'"

try:
    cursor.execute(query)
    conn.commit()
except:
    conn.rollback()

try:
    cursor.execute(pro)
    conn.commit()
except:
    conn.rollback()

cursor.close()
conn.close()
复制代码
复制代码

这一段中删除马克提尼岛的原因是该地区的疫情数据一直停留在2020年的某天,我们的主键是时间和地名,这样就会触发回滚事件……

另外我们还建了一个标记最后爬取日期的表,以便咱们搭建网站时使用

 
好文要顶 关注我 收藏该文  
原文地址:https://www.cnblogs.com/ldy2396/p/15664301.html