14-python登入教务网(python+bs4)

用request先得到到session对象,用其去放送请求,会自动保存cookie。

模拟有验证码的登入步骤:

1.发送请求登入页面;

2.分析验证码的地址,以及要将登入请求发往的地址(可以先输入错的密码登入一次,抓包获取发送地址)

3.将返回的验证码存入文件,读取验证码,手动输入;

4.整合所有数据以及验证码,发往登入验证界面;

5.登入成功后即可访问只有登入后的界面了,比如个人主页

#_*_ coding: utf-8 _*_

'''
Created on 2018年7月16日
@author: sss
function: 登入ctgujwc

'''
from bs4 import BeautifulSoup
import requests
import random
from pip._vendor.distlib.compat import raw_input

def CheckCode(checkcode):
    with open('checkcode.jpg', 'wb+') as f:
        f.write(checkcode)
    text = raw_input('请输入验证码:')
    return text

def WriteFile(test):
    with open('ys_person.html', 'w+') as f:
        f.write(test)
    print('已写入!')
    
def jwcLogin():
    #构造一个Session对象,可以保存Cookie:
    sess = requests.Session()
    ua_list = [
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
            "Mozilla/5.0 (X11; CrOS i686 2268.111.0)like Gecko",
            "Mozilla/5.0 (Macintosh; U; PPC Mac OS X ",
            "Mozilla/5.0 (Macintosh; Intel Mac OS "
    ]
        
    user_agnet = random.choice(ua_list)
    
    headers = {
        "Connection" : "keep-alive",
        "Accept" : "application/json, text/javascript, */*; q=0.01",
        "User-Agent" : user_agnet,
    }
    
    #首先过去登入页面
    html = sess.get('http://210.42.38.26:84/jwc_glxt/Login.aspx', headers = headers).text
#     print(html)
    
    #调用lxml解析库
    bs = BeautifulSoup(html, 'lxml')
    
    #获取页面验证码网址:
    checkcodeUrl = bs.find('img', attrs = {"id": 'ImageCheck'}).get('src')
#     print(checkcodeUrl)
    checkcodeUrl = 'http://210.42.38.26:84/jwc_glxt/' + checkcodeUrl
    
    #获得验证码图片:
    checkcode_data = sess.get(checkcodeUrl, headers = headers).content
#     print(checkcode_data)
    #验证码存入文件
    text = CheckCode(checkcode_data)
    
    #放入post表单数据,只放用户名+密码+验证码是不行的!!!
    data = {
        '__VIEWSTATE':'/wEPDwUKLTQ4NjU1OTA5NGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFCGJ0bkxvZ2luMRg1SjrafPmtoydz1mPeR4vBlIE=',
        '__EVENTVALIDATION':'/wEWBQK8vuPMAgKl1bKzCQKC3IeGDAK1qbSRCwLO44u1DdFTNDJgcOwlCVJHcDBqwrj3IMXf',
        "txtUserName" : "name",
        "txtPassword" : "password",
        'btnLogin.x':'41',
        'btnLogin.y':'31',
        "CheckCode" : text
    }
    
    #发起post请求登入:
    respense = sess.post('http://210.42.38.26:84/jwc_glxt/Login.aspx', data = data, headers = headers)
#     print(respense)

    #获取登入后的个人主页:
    respense = sess.get('http://210.42.38.26:84/jwc_glxt/Stu_Info/Stu_info.aspx').text;
#     print(respense)
    #将个人主页写入文件
    WriteFile(respense)
    
    #退出登入
    respense = sess.get('http://210.42.38.26:84/jwc_glxt/Login.aspx?xttc=1')
    print('退出!')

if __name__ == '__main__':
    jwcLogin()
    
    
    
    
    
    
    
    
    
    
    

  

原文地址:https://www.cnblogs.com/zhumengdexiaobai/p/9320566.html