selenium爬虫 | 爬取疫情实时动态

import csv
import selenium.webdriver
from selenium.webdriver.chrome.options import Options

class spider():
def get_msg(self,url):
global timeNum, provinceDic
# 无窗口弹出操作
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver=selenium.webdriver.Chrome(options=options)
driver.get(url)
timeNum=driver.find_element_by_xpath('//*[@id="charts"]/div[2]/span[1]').text#实时
icbar_confirm=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[1]/div[1]').text#全国确诊数
icbar_suspect=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[2]/div[1]').text#疑似病例数
icbar_cure=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[3]/div[1]').text#治愈人数
icbar_dead=driver.find_element_by_xpath('//*[@id="charts"]/div[3]/div[4]/div[1]').text#死亡人数
print("{} 全国确诊:{} 疑似病例:{} 治愈人数:{} 死亡人数:{} ".format(timeNum, icbar_confirm, icbar_cure, icbar_dead,icbar_suspect))
place_current=driver.find_elements_by_css_selector('div[class="place current"]')#湖北省的数据
place = driver.find_elements_by_css_selector('div[class="place"]')#其他省的数据
place_= driver.find_elements_by_css_selector('div[class="place "]')#其他省的数据
place_no_sharp = driver.find_elements_by_css_selector("div[class='place no-sharp ']")#自治区的数据
tplt = "{0:{4}<10} {1:{4}<15} {2:{4}<15} {3:{4}<15}"
print(tplt.format("地区","确诊人数","治愈人数","死亡人数",chr(12288)) + " ")
# 建立一个字典,键为省名,值为省的具体数据
provinceDic=dict()
provinceDic["全国"]=["全国",icbar_confirm, icbar_cure, icbar_dead, icbar_suspect]
places = place_current + place + place_ + place_no_sharp # 所有的行省的数据列表合集
for place in places:
# print(place.text)
name=place.find_element_by_css_selector("span[class='infoName']").text
confirm=place.find_element_by_css_selector("span[class='confirm'] span").text
try:
heal=place.find_element_by_css_selector("span[class='heal '] span").text
except:
heal = place.find_element_by_css_selector("span[class='heal hide'] span").text
try:
dead=place.find_element_by_css_selector("span[class='dead '] span").text
except:
dead=place.find_element_by_css_selector("span[class='dead hide'] span").text
print(tplt.format(name,confirm,heal,dead,chr(12288)))
provinceDic[name]=[name,confirm,heal,dead]
def save_data_as_csv(self,filename,dataList):
# filename="_".join(time.split(":"))
filename=filename.replace(":"," ")#调整时间
with open(filename+".csv","w",newline="") as f:
writer=csv.writer(f)
writer.writerow(["地区","确诊人数","治愈人数","死亡人数","疑似病例"])
for i in dataList:
writer.writerow(i)
f.close()
def main(self):
url = "https://news.qq.com/zt2020/page/feiyan.htm"
self.get_msg(url)
self.save_data_as_csv(timeNum,provinceDic.values())


billie=spider()
billie.main()


原文地址:https://www.cnblogs.com/billie52707/p/12238787.html