爬虫之12306手工打码篇

 1 # -*- coding: utf-8 -*-
 2 # @Author  : Jackzz
 3 
 4 import requests,re,base64
 5 import urllib3
 6 
 7 urllib3.disable_warnings()
 8 
 9 def position_count(args):
10     """
11     1 2 3 4
12     5 6 7 8
13     :param args:
14     :return:
15     """
16     position_dict = {
17         '1': '49,50',
18         '2': '106,50',
19         '3': '174,50',
20         '4': '240,50',
21         '5': '50,121',
22         '6': '120,120',
23         '7': '174,123',
24         '8': '240,125',
25     }
26     position_data = []
27     for i in args:
28         position_data.append(position_dict.get(i))
29     return ','.join(position_data)
30 
31 def req_html(url_img):
32 
33 
34     headers = {
35         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
36     }
37     params = {
38 
39         "login_site": "E",
40         "module": "login",
41         "rand": "sjrand",
42         "1555069254145":"",
43         "callback":"jQuery19105854264088499135_1555069122298",
44         "_": "1555069122300"
45 
46     }
47     # 1. 创建session对象, 设置相关信息
48     session = requests.Session()
49     session.headers = headers
50     session.params = params
51 
52     html_page = session.get(url=url_img,verify=False)
53 
54     # 3. 数据处理
55     bs64_img = re.findall(r'"image":"(.*?)",',html_page.text,re.S|re.M)[0]
56     # print(base64.b64decode(bs64_img))
57     img_data = base64.b64decode(bs64_img)
58     with open(r'ca.jpg','wb') as f:
59         f.write(img_data)
60     input_num = input('输入坐标字典的key:')
61     pic_num = input_num.split()
62     # position_count()
63     print(position_count(pic_num))
64     pix_num = position_count(pic_num)
65     params = {
66         "callback": "jQuery19107793119804911866_1554992766399",
67         "answer": pix_num,
68         "rand": "sjrand",
69         "login_site": "E",
70         "_": "1554992766402",
71     }
72 
73     # 5. 发送check
74     session.params = params
75     html_page = session.get(url="https://kyfw.12306.cn/passport/captcha/captcha-check",verify=False)
76     print(html_page.text)
77     #
78 
79 if __name__ == '__main__':
80     url_img = 'https://kyfw.12306.cn/passport/captcha/captcha-image64'
81 
82     req_html(url_img)

验证码图片下载下来命名为ca.jpg,打开后输入坐标就好了,用空格分开2个输入轴,运行结果如下:

 

输入坐标字典的key:1 7
49,50,174,123
/**/jQuery19107793119804911866_1554992766399({"result_message":"验证码校验成功","result_code":"4"});

Process finished with exit code 0
原文地址:https://www.cnblogs.com/jackzz/p/10698630.html