亲测,完全有效,使用webdriver,自写,裁判文书网,批量全部下载

直接上代码(注意改用户名,密码)

 1 """程序说明"""
 2 # -*-  coding: utf-8 -*-
 3 # Author: zhou bo
 4 # Datetime : 2020
 5 # software: PyCharm
 7 from selenium import webdriver
 8 from selenium.webdriver.common.by import By
 9 from selenium.webdriver.support import expected_conditions as EC
10 from selenium.webdriver.support.wait import WebDriverWait
11 import math
12 import time
13 import logging
14 from selenium.webdriver.firefox.options import Options
15 import os
16 from crawler_tools import user_agent as u
17 from datetime import datetime
18 from selenium.common.exceptions import *
19 import pyautogui
20 import random
21 from selenium.webdriver import ActionChains
22 from retrying import retry
23 
24 
25 def login(driver):
26     """登录"""
27     # 切换框架
28     wait = WebDriverWait(driver, 20)
29     driver.refresh()
30     frame = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="contentIframe"]')))
31     driver.switch_to.frame(frame)
32 
33     click = wait.until(EC.presence_of_element_located(
34         (By.XPATH, '//*[@id="phoneNumber"]')))
35     click.send_keys("手机号")
36     time.sleep(1)
37     click1 = wait.until(EC.presence_of_element_located((By.XPATH, '/html/body/app-root/div/app-login/div/div/form/div/div[2]/input')))
38     # click1.clear()
39     click1.send_keys("密码")
40     time.sleep(1)  # 等一秒是最优选择,短了网络错误
41     button1 = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.custom-button')))
42     button1.click()
43     # 必须加上表单退出,否者就是死元素无法定位
44     driver.switch_to.default_content()
45 
46     # 通过输入,进行进入数据
47     select_value = wait.until(EC.presence_of_element_located(
48         (By.XPATH, '//*[@id="_view_1540966814000"]/div/div[1]/div[2]/input')))#//*[@id="_view_1540966814000"]/div/div[1]/div[2]/input
49     select_value.send_keys("合同纠纷")
50     time.sleep(2)  # 等一秒是最优选择,短了网络错误
51     driver.get(
52         "https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=b67ff15b548ff825d1e09dc899ecf778&s21=%E5%90%88%E5%90%8C%E7%BA%A0%E7%BA%B7")
53     five_to_15(driver)
54     down_load(driver)
55     while(1):
56         next_page(driver)
57         time.sleep(2)
58         down_load(driver)
59 
60 def five_to_15(driver):
61     wait = WebDriverWait(driver, 20)
62     button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[8]/div/select')))#//*[@id="_view_1545184311000"]/div[8]/div/select
63     button_.click()
64     # time.sleep(1)
65     button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[8]/div/select/option[3]')))#//*[@id="_view_1545184311000"]/div[8]/div/select/option[3]
66     button_.click()
67     time.sleep(1)
68 
69 def down_load(driver):
70     wait = WebDriverWait(driver, 20)
71     button_select = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[2]/div[4]/a[1]')))
72     button_select.click()
73     time.sleep(2)  # 等一秒是最优选择,短了网络错误
74     button_download = wait.until(
75         EC.element_to_be_clickable((By.XPATH, '//*[@id="_view_1545184311000"]/div[2]/div[4]/a[3]')))
76     button_download.click()
77 
78 def next_page(driver):
79     wait = WebDriverWait(driver, 20)
80     button_ = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="left_7_3"]/a[last()]')))
81     time.sleep(2)
82     button_.click()
83 
84 
85 
86 if __name__ =="__main__":
87     # 读取限定词目录
88     driver = webdriver.Chrome('E:GoogleDriverchromedriver.exe')
89     driver.get("https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=b67ff15b548ff825d1e09dc899ecf778&s21=%E5%90%88%E5%90%8C%E7%BA%A0%E7%BA%B7")
90     time.sleep(5)
91     login(driver)
原文地址:https://www.cnblogs.com/smartisn/p/13865729.html