4.selenium爬取动态加载的数据.py药监局拿企业名称和企业编号

from selenium import webdriver
from time import sleep
from lxml import etree

bro = webdriver.Chrome(executable_path='chromedriver.exe')

# 拿到网址
bro.get('http://125.35.6.84:81/xk/')
sleep(1)

# 拿到页面源码数据
page_text = bro.page_source

# 拿到第一页的源码数据
page_text_list = [page_text]

# 循环三页并拿到源码数据
for i in range(3):
bro.find_element_by_id('pageIto_next').click() # 点击下一页
sleep(1)
page_text_list.append(bro.page_source)

# 循环列表里面的源码数据并解析
for page_text in page_text_list:
tree = etree.HTML(page_text)
# 定位盒子
li_list = tree.xpath('//ul[@id="gzlist"]/li')
for li in li_list:
# 企业名称
title = li.xpath('./dl/@title')[0]
# 企业编号
num = li.xpath('./ol/@title')[0]
print(title + ':' + num)

sleep(2)
bro.quit()
原文地址:https://www.cnblogs.com/zhang-da/p/12335889.html