selenium使用

selenium安装

安装 pip install selenium

模拟人操作浏览器

需要装对应的浏览器驱动

将浏览器驱动的exe文件放到python的script文件夹下

需导入模块

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('window-size=1920x3000') #指定浏览器分辨率
chrome_options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 可以提升速度
chrome_options.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统如果无界面不加这条会启动失败
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])#取消浏览器驱动提示

配置文件

from selenium.webdriver.chrome.options import Options
chrome_options = Options()
# chrome_options.add_argument('window-size=1920x3000') #指定浏览器分辨率
chrome_options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 可以提升速度
#使用方法
driver = webdriver.Chrome(options=chrome_options)

元素查找

 1、find_element_by_id 通过id
 2、find_element_by_link_text 通过链接内容
 3、find_element_by_partial_link_text 链接内容的模糊查询 
 4、find_element_by_tag_name 通过标签名
 5、find_element_by_class_name 通过class名 class ="oo  xx" 可以匹配到"oo xx ss"但是匹配不到"oo"
 6、find_element_by_name 通过name名 name="xx"
 7、find_element_by_css_selector 通过css选择器
 8、find_element_by_xpath 通过xpath

Xpath

//在这个页面下查找
idex//a 查找index下的子子孙孙中的a标签
index/a 查找index下的儿子a标签
/在根下找
#chorme浏览器中点击检查,确认到标签位置,右击copy,copy中有copy xpath选项,可以快速获得xpath

等待页面加载

显式等待:明确需要等待的标签
wait = WebDriverWait(browser,3)等待3秒
#等待哪个元素加载好
wait.until(EC.presence_of_element_located(By.ID,'q'))
#等待元素可点击(参数为元组)
wait.until(EC.element_to_be_clickable((By.ID,"nc_1_n1z")))



隐式等待:所有标签没有加载好都需要等待5秒
driver.implicitly_wait(5)
#获取输入框
input_tag = driver.finde_element_by_id("kw")
#输入
input_tag.send_keys("alex大宝贝")
#操作键盘
input_tag.send_keys(Keys.ENTER)#回车
#通过标签文本内容查找
login_tag = driver.find_element_by_link_text("登录")
#文本内容模糊查询
driver.find_element_by_partial_link_text("录")
#点击
login_tag.click()

常用操作

browser=webdriver.Chrome()
browser.get('https://www.amazon.cn/')
wait=WebDriverWait(browser,10)
#获取标签
input_tag=wait.until(EC.presence_of_element_located((By.ID,'twotabsearchtextbox')))
#在标签内输入内容
input_tag.send_keys('iphone 8')
#回车
input_tag.send_keys(KEYS.ENTER)
#清空
input_tag.clear()
#点击
button.click()

动作链

#按住并保持,perform不能少
ActionChains(driver).click_and_hold(sourse).perform()
#移动
ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform()
#松开
ActionChains(driver).release().perform()
#可以自己写js语句,执行动作
browser.execute_script('alert("hello world")')