入门测试,扒拉百度搜索结果

import sys
import re
from typing import List
import json

from selenium import webdriver
#from gjypjd.utils import exetcute_sql,if_headless
#import pymysql
from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as expected
from selenium.webdriver.support.wait import WebDriverWait

from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

options = Options()
options.add_argument('-headless')  # 无头参数
# options.set_headless(True)
# chrome_options = Options()
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')

dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36"
)

# driver = webdriver.Firefox(firefox_options = options)#这里是火狐的浏览器运行方法
driver = webdriver.PhantomJS(desired_capabilities = dcap)#

cookies ={"acw_tc": "276aedc516189085565492268e1530fd7e043ac4523bf573676dca2fca8f3f",
    "JSESSIONID": "C0B475EE5962B7C90F71EB862627BB73.7",
    "neCYtZEjo8GmS": "5O6Hn0i.gDRp6f8dJGudI4.UBHLWgRJlh5l0LYeQwx.9KEWPXazNRAx0ALqk7D1CDyRK5iJD4mQirkhOvKvo_eq",
    "neCYtZEjo8GmT": "53o_V4Cr523Lqqqmg4vUfBGZPKILr44ujVQJu1T3uCI6R2_HxDDUrgumJJqgoZPIADbffc.Zm0xoyktxoxtzqBT3PWI.SuY9aAu7l4hCzrWT5FmQzfRobFk490Pqa7n9DQ7uA86KhZsBZndKQIISaJ3YmIyjTFNa9TH4s8smZyjU2U1zwCiLF3SqijpHDPH3IQSX2DY49D_KkGemtWhT6leZepOcSqfVht8hFiv6uIMWKnsKELMPggGPmevnoLer0RYunt5uXlyTznUKdvOnH8AroiEJe72V2xqHyZ.cDdTtdWvPqib17RKRZb1c7WobFG"
}

driver.get('http://app1.nmpa.gov.cn/data_nmpa/face3/dir.html?type=yp')

driver.implicitly_wait(10)

print(driver.page_source)


cookies1= driver.get_cookie('domain');
print(cookies1)

with open("cookies.txt", "r") as f:
    # 从文件获取cookies,并转化成list对象
    cookies: List[Dict] = json.load(f)
# 遍历每一条cookies,把登录的cookies传入到企业微信中
for cookie in cookies:
    # 由于selenium的cookies不支持expiry,所以需要去掉
    if "expiry" in cookie.keys():
        # dict支持pop的删除函数
        cookie.pop("expiry")
    # 添加cookies
    driver.add_cookie(cookie)

# driver.add_cookie(cookie_dict= cookies)
#get 方法 打开指定网址
driver.get('http://app1.nmpa.gov.cn/data_nmpa/face3/search.jsp?6SQk6G2z=GBK-5RWnNqwnGCEoyB6.X6qnkNUjJ44QBnuyOPTxHTrYkEVvJ_zldCQbi6OTK9gkK9QsBjidwgOSqy8a.aQYrg5SizKfHWyPoUF_u4uGfeAMDaoMNmkHbMzfgDMwYcj3fFjXQoiewH_.zQW53CWqKVDHO27YoNfVLGVqanx73YBQK_MGhAGWgCM1PFK7Fz0LvBQe6QURlVuhdpVNmN7wR4MUcec6UwQW4eAq4K5dIQY9Hj76NcKe5yxyb9GJqCDZ70c.D5fLtmNvOyIKSW08REmwXuR_xWJpSqLa9.sZFs3DpZ8913WU1ccwv.a1aNtJDeMQ14S8R.JOOSj2P5zhjENRj43LqrbMZIzs53f4S_mZbLV7&c1SoYK0a=GBK-4fzZ4ejgwRW3SCbDGETEb9bW8e_EQpv8bHkTV0LSyoMbKIL7lpMe7MKFCg_vcVd1P5rVJQNaT8WNG7XYltPd0db7VSRRcUegLXEpKYnPt1t.oVEvxl5ICYo7rsOrufJj6isZrBY25E2UCx2UFW8UfieSqYjda9fAMWsC2oDK4FjTxvgDF8gw1MnNGSVybtCXd')

driver.implicitly_wait(10)


# driver.implicitly_wait(1000)

#选择网页元素
# element_keyword = driver.find_element_by_id('kw')
#
# #输入字符
# element_keyword.send_keys('宋曲')
#
# #找到搜索按钮
# element_search_button = driver.find_element_by_id('su')
#
# element_search_button.click()
# time.sleep(2)

# driver.implicitly_wait(10)

# resultElemnts= driver.find_elements_by_class_name('result-op')

html = driver.page_source
print(driver.page_source)

print('result compile=')
regex = re.compile('<h3 class="t"><a[^>]*(.*?)</a>')

tx = 1

patterns = re.findall(regex, html)
for i in patterns:
    print(tx)
    print(i)
    # print(re.match(r'<a.*>(.*)</a>', i).group(1))
    # print(i[1])
    # print(i[1].split('//')[1])
    tx = tx + 1


print('result count=')
# print(len(resultElemnts))
#
# print('result t=')
# print(resultElemnts[1].find_element_by_class_name('t').text)

# discount=1
# for item in resultElemnts:
#     if discount == 1:
#         discount = discount +1
#         continue
#
#     # s2 = (item.find_element_by_xpath('//div/a[1]'))
#     s9 = item.find_element_by_class_name('t')
#     if s9 is not None:
#         if s9.is_displayed():
#             print(discount)
#             print(s9.text)
#
#         # print(item.find_element_by_class_name('t').text)
#
#     # if item.is_displayed():
#     #     print(discount)
#     #     print(item.text)
#     discount=discount+1


# s2=resultElemnts[0].find_element_by_xpath(self,'//div/a[1]')
# print('s1=')
# if s2.is_displayed():
#     print(s2.text)
# print(resultElemnts[0].find_element_by_xpath('//div/a[1]'))
print('s1 end')
# ret = driver.find_element_by_id('1')
# print(ret.text)
#
# if ret.text.startswith('宋曲'):#是不是已宋曲开头
#     print('测试通过')
# else:
#     print('不通过')

#最后,driver.quit()让浏览器和驱动进程一起退出,不然桌面会有好多窗口
driver.quit()


print('hello')
原文地址:https://www.cnblogs.com/CoreXin/p/14688865.html