中文文档:
https://selenium-python-zh.readthedocs.io/en/latest/installation.html
下载 chromedriver
https://sites.google.com/a/chromium.org/chromedriver/downloads
安装 导入
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait
声明浏览器对象
from selenium import webdriver browser = webdriver.Chrome() browser = webdriver.Firefox() browser = webdriver.Edge() browser = webdriver.PhantomJS() browser = webdriver.Safari()
访问页面
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.taobao.com') print(browser.page_source) browser.close()
查找元素
单个元素
input_first = browser.find_element_by_id('q') input_second = browser.find_element_by_css_selector('#q') input_third = browser.find_element_by_xpath('//*[@id="q"]') print(input_first, input_second, input_third)
其他
find_element_by_name
find_element_by_xpath
find_element_by_link_text
find_element_by_partial_link_text
find_element_by_tag_name
find_element_by_class_name
find_element_by_css_selector
例子:
from selenium import webdriver from selenium.webdriver.common.by import By browser = webdriver.Chrome() browser.get('https://www.taobao.com') input_first = browser.find_element(By.ID, 'q') print(input_first) browser.close()
多个元素
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.taobao.com') lis = browser.find_elements_by_css_selector('.service-bd li')
from selenium.webdriver.common.by import By #需要导入模块
lis = browser.find_elements(By.CSS_SELECTOR, '.service-bd li') #两种结果一样
print(lis) browser.close()
返回列表格式,
[<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-1")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-2")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-3")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-4")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-5")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-6")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-7")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-8")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-9")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-10")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-11")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-12")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-13")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-14")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-15")>,
<selenium.webdriver.remote.webelement.WebElement (session="c26290835d4457ebf7d96bfab3740d19", element="0.09221044033125603-16")>]
- find_elements_by_name
- find_elements_by_xpath
- find_elements_by_link_text
- find_elements_by_partial_link_text
- find_elements_by_tag_name
- find_elements_by_class_name
- find_elements_by_css_selector
元素交互操作
对获取的元素调用交互方法
from selenium import webdriver import time browser = webdriver.Chrome() browser.get('https://www.taobao.com') input = browser.find_element_by_id('q') input.send_keys('iPhone') time.sleep(1) input.clear() input.send_keys('iPad') button = browser.find_element_by_class_name('btn-search') button.click()
更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement
交互动作
将动作附加到动作链中串行执行
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') source = browser.find_element_by_css_selector('#draggable') target = browser.find_element_by_css_selector('#droppable') actions = ActionChains(browser) actions.drag_and_drop(source, target) actions.perform()
更多操作: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.action_chains
执行JavaScript
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') browser.execute_script('window.scrollTo(0, document.body.scrollHeight)') browser.execute_script('alert("To Bottom")')
获取元素信息
获取属性
from selenium import webdriver from selenium.webdriver import ActionChains browser = webdriver.Chrome() url = 'https://www.zhihu.com/explore' browser.get(url) logo = browser.find_element_by_id('zh-top-link-logo') print(logo) print(logo.get_attribute('class'))
<selenium.webdriver.remote.webelement.WebElement (session="e08c0f28d7f44d75ccd50df6bb676104", element="0.7236390660048155-1")> zu-top-link-logo
获取文本值
from selenium import webdriver browser = webdriver.Chrome() url = 'https://www.zhihu.com/explore' browser.get(url) input = browser.find_element_by_class_name('zu-top-add-question') print(input.text)
提问
获取ID、位置、标签名、大小
from selenium import webdriver browser = webdriver.Chrome() url = 'https://www.zhihu.com/explore' browser.get(url) input = browser.find_element_by_class_name('zu-top-add-question') print(input.id) print(input.location) print(input.tag_name) print(input.size)
0.6822924344980397-1 {'y': 7, 'x': 774} button {'height': 32, 'width': 66}
Frame
import time from selenium import webdriver from selenium.common.exceptions import NoSuchElementException browser = webdriver.Chrome() url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') source = browser.find_element_by_css_selector('#draggable') print(source) try: logo = browser.find_element_by_class_name('logo') except NoSuchElementException: print('NO LOGO') browser.switch_to.parent_frame() logo = browser.find_element_by_class_name('logo') print(logo) print(logo.text)
<selenium.webdriver.remote.webelement.WebElement (session="4bb8ac03ced4ecbdefef03ffdc0e4ccd", element="0.44746093888932004-1")> NO LOGO <selenium.webdriver.remote.webelement.WebElement (session="4bb8ac03ced4ecbdefef03ffdc0e4ccd", element="0.13792611320464965-2")> RUNOOB.COM
等待
隐式等待
当使用了隐式等待执行测试的时候,如果 WebDriver没有在 DOM中找到元素,将继续等待,超出设定时间后则抛出找不到元素的异常, 换句话说,当查找元素或元素并没有立即出现的时候,隐式等待将等待一段时间再查找 DOM,默认的时间是0
from selenium import webdriver browser = webdriver.Chrome() browser.implicitly_wait(10) browser.get('https://www.zhihu.com/explore') input = browser.find_element_by_class_name('zu-top-add-question') print(input)
<selenium.webdriver.remote.webelement.WebElement (session="b29214772d59e912f1ac52e96ed29abe", element="0.12886805191194894-1")>
显式等待
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC browser = webdriver.Chrome() browser.get('https://www.taobao.com/') wait = WebDriverWait(browser, 10) input = wait.until(EC.presence_of_element_located((By.ID, 'q'))) button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search'))) print(input, button)
<selenium.webdriver.remote.webelement.WebElement (session="07dd2fbc2d5b1ce40e82b9754aba8fa8", element="0.5642646294074107-1")>
<selenium.webdriver.remote.webelement.WebElement (session="07dd2fbc2d5b1ce40e82b9754aba8fa8", element="0.5642646294074107-2")>
- title_is 标题是某内容
- title_contains 标题包含某内容
- presence_of_element_located 元素加载出,传入定位元组,如(By.ID, 'p')
- visibility_of_element_located 元素可见,传入定位元组
- visibility_of 可见,传入元素对象
- presence_of_all_elements_located 所有元素加载出
- text_to_be_present_in_element 某个元素文本包含某文字
- text_to_be_present_in_element_value 某个元素值包含某文字
- frame_to_be_available_and_switch_to_it frame加载并切换
- invisibility_of_element_located 元素不可见
- element_to_be_clickable 元素可点击
- staleness_of 判断一个元素是否仍在DOM,可判断页面是否已经刷新
- element_to_be_selected 元素可选择,传元素对象
- element_located_to_be_selected 元素可选择,传入定位元组
- element_selection_state_to_be 传入元素对象以及状态,相等返回True,否则返回False
- element_located_selection_state_to_be 传入定位元组以及状态,相等返回True,否则返回False
- alert_is_present 是否出现Alert
详细内容:http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.support.expected_conditions
前进后退
import time from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.baidu.com/') browser.get('https://www.taobao.com/') browser.get('https://www.python.org/') browser.back() time.sleep(1) browser.forward() browser.close()
Cookies
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') print(browser.get_cookies()) browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'}) print(browser.get_cookies()) browser.delete_all_cookies() print(browser.get_cookies())
选项卡管理
import time from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.baidu.com') browser.execute_script('window.open()') print(browser.window_handles) browser.switch_to_window(browser.window_handles[1]) browser.get('https://www.taobao.com') time.sleep(1) browser.switch_to_window(browser.window_handles[0]) browser.get('https://python.org')
['CDwindow-4f58e3a7-7167-4587-bedf-9cd8c867f435', 'CDwindow-6e05f076-6d77-453a-a36c-32baacc447df']
异常处理
from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.baidu.com') browser.find_element_by_id('hello')
---------------------------------------------------------------------------
NoSuchElementException Traceback (most recent call last)
<ipython-input-23-978945848a1b> in <module>()
3 browser = webdriver.Chrome()
4 browser.get('https://www.baidu.com')
----> 5 browser.find_element_by_id('hello')
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py in find_element_by_id(self, id_)
267 driver.find_element_by_id('foo')
268 """
--> 269 return self.find_element(by=By.ID, value=id_)
270
271 def find_elements_by_id(self, id_):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py in find_element(self, by, value)
750 return self.execute(Command.FIND_ELEMENT, {
751 'using': by,
--> 752 'value': value})['value']
753
754 def find_elements(self, by=By.ID, value=None):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
234 response = self.command_executor.execute(driver_command, params)
235 if response:
--> 236 self.error_handler.check_response(response)
237 response['value'] = self._unwrap_value(
238 response.get('value', None))
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
190 elif exception_class == UnexpectedAlertPresentException and 'alert' in value:
191 raise exception_class(message, screen, stacktrace, value['alert'].get('text'))
--> 192 raise exception_class(message, screen, stacktrace)
193
194 def _value_or_default(self, obj, key, default):
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"id","selector":"hello"}
(Session info: chrome=57.0.2987.133)
(Driver info: chromedriver=2.27.440174 (e97a722caafc2d3a8b807ee115bfb307f7d2cfd9),platform=Mac OS X 10.12.3 x86_64)
from selenium import webdriver from selenium.common.exceptions import TimeoutException, NoSuchElementException browser = webdriver.Chrome() try: browser.get('https://www.baidu.com') except TimeoutException: print('Time Out') try: browser.find_element_by_id('hello') except NoSuchElementException: print('No Element') finally: browser.close()
详细文档:http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions