用request先得到到session对象,用其去放送请求,会自动保存cookie。
模拟有验证码的登入步骤:
1.发送请求登入页面;
2.分析验证码的地址,以及要将登入请求发往的地址(可以先输入错的密码登入一次,抓包获取发送地址)
3.将返回的验证码存入文件,读取验证码,手动输入;
4.整合所有数据以及验证码,发往登入验证界面;
5.登入成功后即可访问只有登入后的界面了,比如个人主页
#_*_ coding: utf-8 _*_ ''' Created on 2018年7月16日 @author: sss function: 登入ctgujwc ''' from bs4 import BeautifulSoup import requests import random from pip._vendor.distlib.compat import raw_input def CheckCode(checkcode): with open('checkcode.jpg', 'wb+') as f: f.write(checkcode) text = raw_input('请输入验证码:') return text def WriteFile(test): with open('ys_person.html', 'w+') as f: f.write(test) print('已写入!') def jwcLogin(): #构造一个Session对象,可以保存Cookie: sess = requests.Session() ua_list = [ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", "Mozilla/5.0 (X11; CrOS i686 2268.111.0)like Gecko", "Mozilla/5.0 (Macintosh; U; PPC Mac OS X ", "Mozilla/5.0 (Macintosh; Intel Mac OS " ] user_agnet = random.choice(ua_list) headers = { "Connection" : "keep-alive", "Accept" : "application/json, text/javascript, */*; q=0.01", "User-Agent" : user_agnet, } #首先过去登入页面 html = sess.get('http://210.42.38.26:84/jwc_glxt/Login.aspx', headers = headers).text # print(html) #调用lxml解析库 bs = BeautifulSoup(html, 'lxml') #获取页面验证码网址: checkcodeUrl = bs.find('img', attrs = {"id": 'ImageCheck'}).get('src') # print(checkcodeUrl) checkcodeUrl = 'http://210.42.38.26:84/jwc_glxt/' + checkcodeUrl #获得验证码图片: checkcode_data = sess.get(checkcodeUrl, headers = headers).content # print(checkcode_data) #验证码存入文件 text = CheckCode(checkcode_data) #放入post表单数据,只放用户名+密码+验证码是不行的!!! data = { '__VIEWSTATE':'/wEPDwUKLTQ4NjU1OTA5NGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFCGJ0bkxvZ2luMRg1SjrafPmtoydz1mPeR4vBlIE=', '__EVENTVALIDATION':'/wEWBQK8vuPMAgKl1bKzCQKC3IeGDAK1qbSRCwLO44u1DdFTNDJgcOwlCVJHcDBqwrj3IMXf', "txtUserName" : "name", "txtPassword" : "password", 'btnLogin.x':'41', 'btnLogin.y':'31', "CheckCode" : text } #发起post请求登入: respense = sess.post('http://210.42.38.26:84/jwc_glxt/Login.aspx', data = data, headers = headers) # print(respense) #获取登入后的个人主页: respense = sess.get('http://210.42.38.26:84/jwc_glxt/Stu_Info/Stu_info.aspx').text; # print(respense) #将个人主页写入文件 WriteFile(respense) #退出登入 respense = sess.get('http://210.42.38.26:84/jwc_glxt/Login.aspx?xttc=1') print('退出!') if __name__ == '__main__': jwcLogin()