Web自动化测试工具：Selenium

PhantomJs：无头浏览器，支持JavaScript。（即包含Js引擎、浏览器排版引擎等核心组件，但是没有和用户交互的界面的浏览器。）

Selenium：WEB自动化测试工具。可以直接运行在浏览器中。不同浏览器提供不同的操作接口，Selenium通过这些接口来操作浏览器。

webdriver：Selenium的核心对象。通过它可以操作浏览器、截图、http访问、http解析等。

开发实例1:处理异步请求。在查询

# 获取bing查询数据
# 通过异步请求返回结果，所以直接访问页面不能获取到搜索结果。
from selenium import webdriver  # 核心驱动对象
import datetime
import time
import random

# 创建核心对象webdriver
# mac系统下的文件路径的斜杠与windows下的是相反的
driver = webdriver.PhantomJS(r'phantomjs-2.1.1-macosx/bin/phantomjs')  # 指定PhantomJS的执行文件的路径
# 设置分辨率
driver.set_window_size(1280, 2400)

# 访问的url
url = 'https://cn.bing.com/search?q=%E7%8C%AB'
driver.get(url)  # 用get方法打开网页，模拟浏览器地址栏输入地址

# 截图：保存图片
def save_picture():
    base_dir = 'picture/'
    file_name = '{}{:%Y%m%d%H%M%S}{}.png'.format(base_dir, datetime.datetime.now(), random.randint(1, 100))
    driver.save_screenshot(file_name)

# save_picture()  # 访问太快，会没来得及加载动态页面部分

# 方法1:等几秒至页面完全加载出来
# time.sleep(5)
# print('*'*30)
# save_picture()  # 间隔5秒之后再次访问，就可以获得完全加载的页面内容


# 方法2:在一定的重试次数内找到对应的标签
MAXRETRYIES = 5  # 最大重试次数

while MAXRETRYIES:
    try:
        ele = driver.find_element_by_class_name('b_caption')  # 找到需要等标签元素即截图保存
        print(ele, MAXRETRYIES)
        save_picture()
        break
    except Exception as e:
        print(type(e))  # <class 'selenium.common.exceptions.NoSuchElementException'>
        print(e)  # 捕获错误
    time.sleep(1)  # 暂停一秒等页面加载。
    MAXRETRYIES -= 1

driver.close()  # 同文件IO，要close

开发实例2:下拉框处理

from selenium import webdriver
import datetime
import random
from selenium.webdriver.support.ui import Select #获取有关select的包

driver = webdriver.PhantomJS(r'phantomjs-2.1.1-macosx/bin/phantomjs')  # 指定PhantomJS的执行文件的路径
driver.set_window_size(1280, 2400)  # 设置窗口大小

# 保存图片
def save_picture():
    base_dir = 'picture/'
    file_name = '{}{:%Y%m%d%H%M%S}{}.png'.format(base_dir, datetime.datetime.now(), random.randint(1, 100))
    driver.save_screenshot(file_name)

url = "https://www.oschina.net/search?scope=project&q=python"
driver.get(url)  # 用get方法打开网页，模拟浏览器地址栏输入地址

# 处理select，想选择tag1的第一项
ele = driver.find_element_by_class_name('tag1')  # 获取元素
print(ele.tag_name)
print('current_url1:', driver.current_url)
save_picture()

s = Select(ele)  # Select()处理的一定是select下拉框对象
s.select_by_index(1)  # web应用开发
time.sleep(2)

print('current_url2:', driver.current_url)
save_picture()

driver.close()

开发实例3:模拟键盘操作登录网页

from selenium import webdriver
import datetime
import time
import random
from selenium.webdriver.common.keys import Keys  # 有关模拟键盘输入的模块

driver = webdriver.PhantomJS(r'phantomjs-2.1.1-macosx/bin/phantomjs')  # 指定PhantomJS的执行文件的路径
driver.set_window_size(1280, 2400)  # 设置窗口大小

# 保存图片
def save_picture():
    base_dir = 'picture/'
    file_name = '{}{:%Y%m%d%H%M%S}{}.png'.format(base_dir, datetime.datetime.now(), random.randint(1, 100))
    driver.save_screenshot(file_name)

url = 'https://www.oschina.net/home/login'
driver.get(url)
print(driver.current_url)  # https://www.oschina.net/home/login
save_picture()

# 获取邮箱和密码的2个输入框对象
email = driver.find_element_by_id('userMail')
pwd = driver.find_element_by_id('userPassword')

# 模拟键盘输入数据
username = '18390900259'
password = '123456'
email.send_keys(username)
pwd.send_keys(password)

print(driver.current_url)  # https://www.oschina.net/home/login
save_picture()

pwd.send_keys(Keys.ENTER)  # 回车=>登录

time.sleep(1)  # 登录后页面会有跳转
print(driver.current_url)  # https://www.oschina.net/?nocache=1553228140174
save_picture()

while True:  # 循环直到找到对应标签对象
    time.sleep(1)
    print(driver.current_url)
    try:
        userinfo = driver.find_element_by_class_name('user-info')
        print(userinfo.text)  # 打印该标签下的文本内容
        save_picture()
        break
    except Exception as e:
        print(e)

cookies = driver.get_cookies()  # 获取长期登陆的cookie print(cookies)
print(cookies)

driver.close()  # 记得要关闭

开发实例4:页面等待

越来越多的页面使用Ajax的异步加载技术，会导致页面中要被访问的内容还没加载就被访问了，代码抛出异常。

方法：

1.线程休眠：time.sleep(n)来等待数据加载。1.1：配合循环一直等到数据加载完成；1.2设置最大重试次数，避免一直循环下去。

2.Selenium等待：2.1:隐式等待：等待特定时间；2.2:显式等待：指定一个条件，直到条件成立继续往后执行。或者设置超时时间，超时抛出异常。

2.1隐式等待

from selenium import webdriver  # 页面隐式等待

driver = webdriver.PhantomJS(r'phantomjs-2.1.1-macosx/bin/phantomjs')
driver.implicitly_wait(10) # 增加这一句，全局设置，会导致下面找元素等待10秒
url = "https://movie.douban.com/"
driver.get(url)
try:
    print('begin-------')
    ele = driver.find_element_by_id('abcde')  # 在这里查找的时限为10秒，没找到跳转到except里
except Exception as e:
    print(type(e))  # <class 'selenium.common.exceptions.NoSuchElementException'>
    print(e, '~~~~~~~~~~')
finally:
    driver.quit()

2.2显式等待

# 定位搜索框，搜索电影
from selenium import webdriver  # 核心对象
import datetime
import random

# 键盘操作
from selenium.webdriver.common.keys import Keys
# WebDriverWait负责循环等待
from selenium.webdriver.support.wait import WebDriverWait
# expected_conditions负责条件触发
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.common.by import By

driver = webdriver.PhantomJS(r'phantomjs-2.1.1-macosx/bin/phantomjs')
driver.set_window_size(1280, 2400)  # 窗口大小

# 打开网页，使用get方法
url = 'https://movie.douban.com/'
driver.get(url)

def save_picture():
    base_dir = 'picture/'
    file_name = "{}{:%Y%m%d%H%M%S}{:03}.png".format(base_dir, datetime.datetime.now(), random.randint(1,100))
    driver.save_screenshot(file_name)

    try:
        # 元素是否已加载到dom树中；使用哪个driver，等到什么条件ok，ec就是等待的条件；expected有很多内置条件方法；20秒为超时时长，超时抛异常，超时时长内在try里默认0.5秒循环执行
        ele = WebDriverWait(driver, 20).until(
            ec.presence_of_element_located((By.ID, 'inp-query'))
            # ex.presence_of_element_located(By.XPATH, '//input[@id="inp-query]'  # 两种不同的写法：一个通过id查找元素，一个通过xpath

        )

        ele.send_keys('TRON')  # 加载出搜索框后，在搜索框内输入'TRON'，进行搜索
        save_picture()

        ele.send_keys(Keys.ENTER) # 输入回车键，开始搜索
        print(driver.current_url) # 查看搜索后跳转的url
        save_picture()
    finally:
        driver.quit()  # 退出浏览器

WebDriver是Selenium的核心，实现与浏览器的交互：打开URL，可以跟踪跳转，可以返回当前页面的实际URL 获取页面的title；处理cookie 控制浏览器的操作，例如前进、后退、刷新、关闭，最大化等执行JS脚本；在DOM中搜索页面元素Web Element，指定的或一批，find系方法；操作网页元素；模拟下拉框操作Select(element) 在元素上模拟鼠标操作click() 在元素上模拟键盘输入send_keys() 获取元素文字 text；获取元素的属性 get_attribute() 等等。