selenium的基本使用

selenium模块实现浏览器自动化搜索京东商品并获取页面源码

from selenium import webdriver
from time import sleep

# 先实例化一个浏览器对象,并传入驱动程序
bro = webdriver.Chrome(executable_path='chromedriver.exe')
# 打开浏览器对一个网址发起请求
bro.get('https://www.jd.com/')
sleep(2)
# 进行标签定位
search_input = bro.find_element_by_id('key')
search_input.send_keys('ipad')  # 给输入框输入内容

search_btn = bro.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
search_btn.click()  # 点击搜索按钮
sleep(2)

# 执行js代码,滑动滚动条至页面底部
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
sleep(2)

# 获取整张页面的数据
page_text = bro.page_source
print(page_text)

sleep(2)
# 退出
bro.quit()

selenium模块实现浏览器自动化访问药监总局多个页面,并获取页面上的信息

from selenium import webdriver
from time import sleep
from lxml import etree

bro = webdriver.Chrome(executable_path='chromedriver.exe')
bro.get('http://scxk.nmpa.gov.cn:81/xk/')
sleep(2)
# 获取第一页的页面源码
page_text = bro.page_source
page_text_list = [page_text]

# 循环两次,再获取后面两页的数据
for i in range(1, 3):
    next_btn = bro.find_element_by_xpath('//*[@id="pageIto_next"]').click()  # 定位到下一页的按钮,并点击
    sleep(1)
    page_text_list.append(bro.page_source)

sleep(2)
bro.quit()

# 解析获取的页面数据,提取需要的信息
for page_text in page_text_list:
    tree = etree.HTML(page_text)
    li_list = tree.xpath('//*[@id="gzlist"]/li')
    for li in li_list:
        c_name = li.xpath('./dl/@title')[0]
        c_num = li.xpath('./ol/@title')[0]
        print(c_name+':'+c_num)

selenium模块动作链的基本使用

from selenium import webdriver
from time import sleep
from selenium.webdriver import ActionChains

bro = webdriver.Chrome(executable_path='chromedriver.exe')
bro.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')

# 注意iframe标签嵌套的页面里的标签不能直接定位到
bro.switch_to.frame('iframeResult')
div_tag = bro.find_element_by_id('draggable')

# 拖动=点击+滑动
# 生成一个动作对象
action = ActionChains(bro)
action.click_and_hold(div_tag)

for i in range(5):
    action.move_by_offset(17, 5).perform()  # (水平,垂直),一定要加上.perform()动作才会执行!
    # action.move_to_element()   # 移动到某个标签
    sleep(0.5)
action.release()  # 动作实行完毕后释放

sleep(3)
bro.quit()
原文地址:https://www.cnblogs.com/straightup/p/13693372.html