环境安装
-
-
下载浏览器驱动程序:
-
查看驱动和浏览器版本的映射关系:
应用
from selenium import webdriver from time import sleep #实例化浏览器插件 bro = webdriver.Chrome(executable_path='./chromedriver.exe') bro.get('https://www.baidu.com') sleep(2) #标签定位 tag_input = bro.find_element_by_id('kw') tag_input.send_keys('人民币') sleep(2) btn = bro.find_element_by_id('su') btn.click() sleep(2) #关闭浏览器 bro.quit()
雪球网应用
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver.exe') bro.get('https://xueqiu.com/') sleep(5) #执行js实现滚轮向下滑动 js = 'window.scrollTo(0,document.body.scrollHeight)' bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) #定位到加载更多按钮 a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') a_tag.click() sleep(5) #获取当前浏览器页面数据(动态) print(bro.page_source) bro.quit()
PhantomJs是一款无可视化界面的浏览器(免安装) 已停止更新 不建议使用
from selenium import webdriver from time import sleep bro = webdriver.PhantomJS(executable_path=r'phantomjs-2.1.1-windowsinphantomjs.exe') bro.get('https://xueqiu.com/') sleep(2)
#截屏 bro.save_screenshot('./1.png') #执行js实现滚轮向下滑动 js = 'window.scrollTo(0,document.body.scrollHeight)' bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.save_screenshot('./2.png') # a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') # bro.save_screenshot('./2.png') # a_tag.click() sleep(2) #获取当前浏览器页面数据(动态) print(bro.page_source) bro.quit()
谷歌无头浏览器
from selenium import webdriver from time import sleep from selenium.webdriver.chrome.options import Options # 创建一个参数对象,用来控制chrome以无界面模式打开 chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=chrome_options) bro.get('https://www.baidu.com') sleep(2) bro.save_screenshot('1.png') #标签定位 tag_input = bro.find_element_by_id('kw') tag_input.send_keys('人民币') sleep(2) btn = bro.find_element_by_id('su') btn.click() sleep(2) print(bro.page_source) bro.quit()
动作链
from selenium import webdriver from time import sleep from selenium.webdriver import ActionChains bro = webdriver.Chrome(executable_path='./chromedriver.exe') url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' bro.get(url=url) #如果定位的标签存在于iframe标签之中,则必须经过switch_to操作在进行标签定位 bro.switch_to.frame('iframeResult') source_tag = bro.find_element_by_id('draggable') #创建一个动作连的对象 action = ActionChains(bro) action.click_and_hold(source_tag) for i in range(4): #perform表示开始执行动作链 action.move_by_offset(20,0).perform() sleep(1) bro.quit()
selenium规避被检测识别
现在不少大网站有对selenium采取监测机制。比如正常情况下我们用浏览器访问淘宝等网站的 window.navigator.webdriver的值为undefined。而使用selenium访问则该值为true。
只需要设置Chromedriver的启动参数即可解决问题。在启动Chromedriver之前,为Chrome开启实验性功能参数 excludeSwitches,它的值为['enable-automation']
from selenium.webdriver import Chrome from selenium.webdriver import ChromeOptions option = ChromeOptions() option.add_experimental_option('excludeSwitches',['enable-automation']) driver=Chrome(options=option)