Python_selenium案例:

selenium案例

#coding=utf-8
#select下拉框处理
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import time
#导入select方法
from selenium.webdriver.support.select import Select
driver=webdriver.Chrome()
driver.get("https://www.baidu.com/")
#隐式等待10秒
driver.implicitly_wait(10)
#鼠标移动到"设置"按钮
mouse=driver.find_element_by_link_text("设置")
ActionChains(driver).move_to_element(mouse).perform()
#点击"搜索设置"
driver.find_element_by_link_text("搜索设置").click()
#强制等待4秒,注意:这里使用隐式等待或显式等待都将无法获取元素
time.sleep(4)
#分两步,先定位下拉框,再点击选项
choice = driver.find_element_by_name("NR")
Select(choice).select_by_index(2)
time.sleep(2)
driver.find_element_by_xpath("//div[@id='gxszButton']/a[1]").click()
time.sleep(2)
driver.switch_to.alert.accept()
#跳转到百度首页后,进行搜索表
driver.find_element_by_id('kw').send_keys("python")
driver.find_element_by_id('su').click()
selenium打开网页+悬浮+点击+进入百度页面+输入关键词+搜索

cookie登陆(百度)

#coding=utf8
from selenium import webdriver
import time
import pprint

base_url = "https://www.baidu.com"
driver = webdriver.Chrome()
driver.implicitly_wait(10)
driver.get(base_url)
#打印所有cookie
pprint.pprint(driver.get_cookies())
1.拿到未登录的cookie
#coding=utf-8
from selenium import webdriver
import time
import pprint
base_url = "https://www.baidu.com"
usr_name = "正牌冰峰汽水"
usr_pwd = "yanyan8174"

driver = webdriver.Chrome()
driver.implicitly_wait(10)

#清除所有cookie
driver.delete_all_cookies()
driver.get(base_url)
pprint.pprint(driver.get_cookies())

driver.find_element_by_link_text("登录").click()
time.sleep(2)
driver.find_element_by_id("TANGRAM__PSP_10__footerULoginBtn").click()
time.sleep(2)
driver.find_element_by_css_selector("#TANGRAM__PSP_10__userName").send_keys(usr_name)
driver.find_element_by_css_selector("#TANGRAM__PSP_10__password").send_keys(usr_pwd)
time.sleep(10)
#在这个等待的时间里去手动输入验证码


driver.find_element_by_css_selector("#TANGRAM__PSP_10__submit").click()
time.sleep(3)
#打印登录后的cookie
pprint.pprint(driver.get_cookies())
2.正常登陆拿到登陆后的cookie
#coding=utf-8
from selenium import webdriver
import time

base_url = "https://www.baidu.com"
driver = webdriver.Chrome()
driver.implicitly_wait(10)

#清除所有cookie
driver.delete_all_cookies()
driver.get(base_url)

cookie_1 = {'httpOnly': True,
            'secure': False,
            'value': '1',
            'name': 'HOSUPPORT',
            'domain': '.passport.baidu.com',
            'path': '/',
            'expiry': 1768236049.395134
            }
cookie_2 = {'httpOnly': True,
            'secure': False,
            'value': 'fi_PncwhpxZ%7ETaKAcaFAwWer%7EzluYq4tLyhh8G8D-51Jh32rZKfPIAaPUksyRGhrJ-ndBYw3t-vNiNSFW6D',
            'name': 'UBI',
            'domain': '.passport.baidu.com',
            'path': '/',
            'expiry': 1768236050.02163}

cookie_3 = {'httpOnly': False,
            'secure': False,
            'value': 'f39184d315d7eacfb7b1f37fc37f5e72',
            'name': 'FP_UID',
            'domain': '.baidu.com',
            'path': '/',
            'expiry': 2556057600}
#测试后发现就添加这个cookie即可
cookie_4 = {}


#添加cookie
driver.add_cookie(cookie_4)
#driver.add_cookie(cookie_2)
#driver.add_cookie(cookie_3)
time.sleep(2)
driver.refresh()
3.模拟登陆

cookie登陆+手动输入验证码

#coding=utf-8
from selenium import webdriver
import time
import pprint
base_url = "http://www.xx007.cn/login.asp"
usr_name = "benq81"
usr_pwd = "jenny8174"

driver = webdriver.Chrome()
driver.implicitly_wait(10)

#清除所有cookie
driver.delete_all_cookies()
driver.get(base_url)
pprint.pprint(driver.get_cookies())

driver.refresh()
driver.find_element_by_name("username").send_keys(usr_name)
driver.find_element_by_name("password").send_keys(usr_pwd)
time.sleep(10)
#在这个等待的时间里去手动输入验证码


driver.find_element_by_css_selector('body > table:nth-child(12) > tbody > tr:nth-child(12) > td > input[type="submit"]').click()
time.sleep(2)
#打印登录后的cookie
pprint.pprint(driver.get_cookies())
1.拿到未登录的cookie+手动输入验证码
#coding=utf-8
from selenium import webdriver
import time
import pprint
base_url = "http://www.xx007.cn/"

driver = webdriver.Chrome()
driver.implicitly_wait(10)
driver.get(base_url)
pprint.pprint(driver.get_cookies())
cookie01=driver.get_cookie('DvForum')
cookie01_value=cookie01['value'].split("=")[-1]
print(cookie01_value)
cookie01={'domain': 'www.xx007.cn',
  'expiry': 1511971.2069744722,
  'httpOnly': False,
  'name': 'DvForum',
  'path': '/',
  'secure': False,
  'value': 'userid=555232&usercookies=2&userhidden=2&password=CgWJBnM9970wE057&userclass=%B4%BF%C2%F2%BC%D2%BB%E1%D4%B1&username=benq81&StatUserID=368139236'
}
cookie02={'domain': 'www.xx007.cn',
  'httpOnly': False,
  'name': 'upNum',
  'path': '/',
  'secure': False,
  'value': '0'}
cookie03={'domain': 'www.xx007.cn',
  'httpOnly': False,
  'name': 'ASPSESSIONIDSATQDDSQ',
  'path': '/',
  'secure': False,
  'value': 'IHDFJPPALBLMOJLCHCHHBFKD'}

driver.delete_all_cookies()
time.sleep(2)
driver.add_cookie(cookie01)
driver.add_cookie(cookie02)
driver.add_cookie(cookie03)
#打印登录后的cookie
time.sleep(4)
driver.refresh()
2.模拟登陆

使用unittest框架编写测试用例

# coding=utf-8
'''
Project:基础类BasePage,封装所有页面公用的方法,
定义open函数,重定义find_element,switch_frame,send_keys等函数。
在初始化方法中定义驱动driver,基本url,title
WebDriverWait提供了显式等待方式。
'''
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class BasePage(object):
    """
    BasePage封装所有页面都公用的方法,例如driver, url ,FindElement等
    """
    #初始化driver、url、pagetitle等
    #实例化BasePage类时,最先执行的就是__init__方法,该方法的入参,其实就是BasePage类的入参。
    def __init__(self, selenium_driver, base_url, pagetitle):
        self.driver = selenium_driver
        self.base_url = base_url
        self.pagetitle = pagetitle

    #通过title断言进入的页面是否正确。
    #使用title获取当前窗口title,检查输入的title是否在当前title中,返回比较结果(True 或 False)
    def on_page(self, pagetitle):
        return pagetitle in self.driver.title

    #打开页面,并校验页面链接是否加载正确
    #以单下划线_开头的方法,在使用import *时,该方法不会被导入,保证该方法为类私有的。
    def _open(self, url, pagetitle):
        #使用get打开访问链接地址
        self.driver.get(url)
        self.driver.maximize_window()
        #使用assert进行校验,打开的窗口title是否与配置的title一致。调用on_page()方法
        assert self.on_page(pagetitle), "打开开页面失败 %s"%url

    #定义open方法,调用_open()进行打开链接
    def open(self):
        self._open(self.base_url, self.pagetitle)

    #重写元素定位方法
    def find_element(self,*loc):
        try:
            #确保元素是可见的。
            #注意:以下入参为元组的元素,需要加*。Python存在这种特性,就是将入参放在元组里。
            #WebDriverWait(self.driver,10).until(lambda driver: driver.find_element(*loc).is_displayed())
            #注意:以下入参本身是元组,不需要加*
            WebDriverWait(self.driver,10).until(EC.visibility_of_element_located(loc))
            return self.driver.find_element(*loc)
        except:
            print("%s 页面中未能找到 %s 元素"%(self, loc))

    #重写switch_frame方法
    def switch_frame(self, loc):
        return self.driver.switch_to.frame(loc)

    #定义script方法,用于执行js脚本,范围执行结果
    def script(self, src):
        self.driver.execute_script(src)

    #重写定义send_keys方法
    def send_keys(self, loc, vaule, clear_first=True, click_first=True):
        try:
            print("使用send_keys")
            loc = getattr(self,"_%s"% loc)  #getattr相当于实现self.loc
            if click_first:
                self.find_element(*loc).click()
            if clear_first:
                self.find_element(*loc).clear()
                self.find_element(*loc).send_keys(vaule)
        except AttributeError:
            print ("%s 页面中未能找到 %s 元素"%(self, loc))
test_pagePage.py
# coding=utf-8
'''
Project:页面基本操作方法:如open,input_username,input_password,click_submit
'''
from selenium.webdriver.common.by import By
from test_basePage import BasePage

#继承BasePage类
class LoginPage(BasePage):
    #定位器,通过元素属性定位元素对象
    username_loc =(By.NAME,'email')
    password_loc =(By.NAME,'password')
    submit_loc =(By.ID,'dologin')
    span_loc =(By.CSS_SELECTOR,"div.error-tt>p")
    dynpw_loc =(By.ID,"lbDynPw")
    userid_loc =(By.ID,"spnUid")

    #操作
    #通过继承覆盖(Overriding)方法:如果子类和父类的方法名相同,优先用子类自己的方法。
    #打开网页
    def open(self):
    #调用page中的_open打开连接
        self._open(self.base_url, self.pagetitle)
    #输入用户名:调用send_keys对象,输入用户名
    def input_username(self, username):
        self.find_element(*self.username_loc).send_keys(username)

    #输入密码:调用send_keys对象,输入密码
    def input_password(self, password):
        self.find_element(*self.password_loc).send_keys(password)

    #点击登录:调用click对象,点击登录
    def click_submit(self):
        self.find_element(*self.submit_loc).click()

    #用户名或密码不合理是Tip框内容展示
    def show_span(self):
        return self.find_element(*self.span_loc).text

    #切换登录模式为动态密码登录(IE下有效)
    def swich_DynPw(self):
        self.find_element(*self.dynpw_loc).click()

    #登录成功页面中的用户ID查找
    def show_userid(self):
        return self.find_element(*self.userid_loc).text
test_loginPage
# coding=utf-8
'''
Project:页面基本操作方法:如open,input_username,input_password,click_submit
'''
from selenium.webdriver.common.by import By
from test_basePage import BasePage

#继承BasePage类
class LoginPage(BasePage):
    #定位器,通过元素属性定位元素对象
    username_loc =(By.NAME,'email')
    password_loc =(By.NAME,'password')
    submit_loc =(By.ID,'dologin')
    error_loc =(By.XPATH,"//div[@class='ferrorhead']")
    userid_loc=(By.ID,"spnUid")
    frame_loc="x-URS-iframe"

    #操作
    #通过继承覆盖(Overriding)方法:如果子类和父类的方法名相同,优先用子类自己的方法。

    #输入用户名:调用send_keys对象,输入用户名
    def input_username(self, username):
        self.find_element(*self.username_loc).send_keys(username)

    #输入密码:调用send_keys对象,输入密码
    def input_password(self, password):
        self.find_element(*self.password_loc).send_keys(password)

    #点击登录:调用click对象,点击登录
    def click_submit(self):
        self.find_element(*self.submit_loc).click()

    #切换到用户登录框的iframe中
    def switch_to_frame(self):
        self.switch_frame(self.frame_loc)

    #用户名或密码不合理是Tip框内容展示
    def show_error(self):
        try:
            WebDriverWait(self.driver,10).until(EC.visibility_of_element_located(self.error_loc))
            return self.find_element(*self.error_loc).text
        except:
            return False

    #切换登录模式为动态密码登录(IE下有效)
    def swich_DynPw(self):
        self.find_element(*self.dynpw_loc).click()
    #登录成功后获取当前窗口的title
    def check_current_title(self):
        return self.driver.title

    #登录成功页面中的用户ID查找
    def show_userid(self):
        return self.find_element(*self.userid_loc).text
tets_126_loginPage.py
# -*- coding:utf8 -*-
'''
Project:使用unittest框架编写测试用例。
'''
import unittest,time
from test_126_loginPage import LoginPage
from selenium import webdriver

class Caselogin126mail(unittest.TestCase):
    """
          登录126邮箱的case
    """
    def setUp(self):
        self.driver = webdriver.Chrome()
        self.driver.implicitly_wait(30)
        self.url ="http://www.126.com"
        self.username ="zhpmiss@126.com"
        self.password ="zhou0829miss@"

    #用例执行体
    def test_login_mail(self):
        #声明LoginPage类对象
        login_page = LoginPage(self.driver, self.url, "网易")
        #调用打开页面组件
        login_page.open()
        #切换到登录框Frame
        time.sleep(4)
        login_page.switch_to_frame()
        #调用用户名输入组件
        login_page.input_username(self.username)
        #调用密码输入组件
        login_page.input_password(self.password)
        #调用点击登录按钮组件
        login_page.click_submit()
        if login_page.show_error():
            print("测试帐号密码有误的情况下是否弹出提示框:")
            self.assertEqual(login_page.show_error(),"帐号或密码错误")
        else:
            print("测试帐号密码正确的情况下是否进入确定页面:")
            self.assertEqual(login_page.show_userid(),'zhpmiss@126.com')
    def tearDown(self):
        print("测试完毕")
        #self.driver.quit()

if __name__ == "__main__":
    unittest.main()
test_126.py

完整案例:1、今日头条

from selenium import webdriver
from lxml import etree
from pyquery import PyQuery as pq
import time

driver = webdriver.Chrome() #实例化
driver.maximize_window()    #窗口最大化
driver.get('https://www.toutiao.com/')
driver.implicitly_wait(10)  #隐性等待10s【必须有,多加几个】
driver.find_element_by_link_text('科技').click()
driver.implicitly_wait(10)  #隐性等待10s
for i in range(3):
    js = "var q = document.documentElement.scrollTop="+str(i*500)
    driver.execute_script(js)
    time.sleep(2)

time.sleep(5)
page = driver.page_source
doc = pq(page)  #用pyquery实例化一下
doc = etree.HTML(str(doc))
contents = doc.xpath('//div[@class="wcommonFeed"]/ul/li')
print(contents) #这是一个对象
print("--------------------------")
for x in contents:
    title = x.xpath('div/div[1]/div/div[1]/a/text()')
    if title:
        title = title[0]
        # with open('toutiao.txt','a+',encoding='utf8')as f:
        #     f.write(title+'
')
        print(title)
    else:
        pass
抓取今日头条——科技 内容title
selenium抓取今日头条,滚动向下拿50页,保存到文本文件
原文地址:https://www.cnblogs.com/hellangels333/p/8778850.html