模拟登陆古诗文网

注意事项:
- 1.模拟登陆的请求参数有些是动态的,需要从页面源码中动态解析获取
- 2.下载图片最好是用requests来下载,而不是urllib,他是会带上ua信息
- 3.使用Session去自动处理cookie请求

import requests
from lxml import etree
from urllib import request
import http.client, mimetypes, urllib, json, time, requests
from get_img_code import YDMHttp


def get_code(types,filename):
    # 用户名(普通用户的用户名)
    username = 'xxxxxxxx'

    # 密码
    password = 'xxxxxxxx'

    # 软件ID,开发者分成必要参数。登录开发者后台【我的软件】获得!
    appid = xxxxx

    # 软件密钥,开发者分成必要参数。登录开发者后台【我的软件】获得!
    appkey = 'xxxxxxxxxxxxxxxxxxxxxxxxxx'

    # 图片文件
    filename = filename

    # 验证码类型,# 例:1004表示4位字母数字,不同类型收费不同。请准确填写,否则影响识别率。在此查询所有类型 http://www.yundama.com/price.html
    codetype = types

    # 超时时间,秒
    timeout = 60

    # 检查
    if (username == 'username'):
        print('请设置好相关参数再测试')
    else:
        # 初始化
        yundama = YDMHttp(username, password, appid, appkey)

        # 登陆云打码
        uid = yundama.login();
        print('uid: %s' % uid)

        # 查询余额
        balance = yundama.balance();
        print('balance: %s' % balance)

        # 开始识别,图片路径,验证码类型ID,超时时间(秒),识别结果
        cid, result = yundama.decode(filename, codetype, timeout);
        print('cid: %s, result: %s' % (cid, result))
        return result


#########################################
# 将requests请求替换成session请求,可以自动处理cookie
requests = requests.Session()
home_url="https://so.gushiwen.org/user/login.aspx"
login_url="https://so.gushiwen.org/user/login.aspx"
headers = {"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
# 1. 向页面首页发送请求,下载验证码图片到本地
page_text = requests.get(url=home_url,headers=headers).text
tree=etree.HTML(page_text)
view_state = tree.xpath('//input[@id="__VIEWSTATE"]/@value')[0]
view_state_generator = tree.xpath('//input[@id="__VIEWSTATEGENERATOR"]/@value')[0]
img_code_url = "https://so.gushiwen.org"+tree.xpath('//img[@id="imgCode"]/@src')[0]
page_content = requests.get(url=img_code_url,headers=headers).content
with open("./code.png","wb")as f:
    f.write(page_content)

# 2. 识别验证码图片
code = get_code(1004,"./code.png")

data={
"__VIEWSTATE": view_state,
"__VIEWSTATEGENERATOR":view_state_generator,
"from":"",
"email": "xxxxxxxxxx@qq.com",
"pwd": "xxxxxxxx",
"code": code,
"denglu": "登录"
}
print(data)

# 3. 实现模拟登录
page_text = requests.post(url = login_url,headers=headers,data=data).text
with open("./gushiwen1.html","w",encoding="utf-8")as f:
    f.write(page_text)
古诗文网携带验证码的模拟登录
原文地址:https://www.cnblogs.com/groundcontrol/p/12622907.html