使用python的requests模块采集请求中的数据

 1 # coding=gbk
 2 
 3 # Create your tests here.
 4 
 5 import requests, re, json, math
 6 
 7 # 设置全局session变量
 8 r_session = requests.session()
 9 
10 #登录账户
11 username = 'admin'
12 
13 #登录密码
14 password = '111111'
15 
16 #登录ip
17 ip = 'localhost'
18 
19 #登录port
20 port = '8088'
21 
22 #模型总数
23 num = 1679
24 
25 #调用登录接口,向session中插入登录状态
26 def login():
27     login_headers = {
28         'Accept': '*/*',
29         'Accept-Encoding': 'gzip, deflate',
30         'Accept-Language': 'zh-CN,zh;q=0.9',
31         'Connection': 'keep-alive',
32         'Content-Length': '40',
33         'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
34         'Host': '{ip}:{port}'.format(ip=ip, port=port),
35         'Origin': 'http://{ip}:{port}'.format(ip=ip, port=port),
36         'Referer': 'http://{ip}:{port}/login'.format(ip=ip, port=port),
37         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
38         'X-Requested-With': 'XMLHttpRequest'
39     }#设置登录请求头
40     login_data = {"username": username, "password": password}#设置登录请求体
41     r = r_session.post('http://{ip}:{port}/login'.format(ip=ip, port=port), headers=login_headers, data=login_data)#发送登录请求
42 
43 #获取系统中的所有模型id
44 def get_model_id(num):
45     modelnum = math.ceil(num/100)
46     model_id = []#用于存储所有的模型id
47     model_headers = {'Content-Type': 'application/json'}
48     for x in range(modelnum):
49         x *= 100
50         r = r_session.get(
51             "http://{}:{}/model/list?limit=100&offset={}&sortName=updatetime&sortOrder=desc&name=&id=&status=".format(ip, port,
52                 x), headers=model_headers).text
53         r = json.loads(r)
54         rows = r.get("rows")
55         for i in rows:
56             model_id.append(i.get("id"))
57     return model_id
58 
59 #获取所有的错误信息
60 def get_error(model_id):
61     repeat_id = []#用于存储所有的错误信息
62     for model_id_detail in model_id:
63         model_detail = r_session.get('http://{ip}:{port}/model/modify/'.format(ip=ip, port=port) + str(model_id_detail)).text
64         edbmodel = re.findall(r'^<input id="edbModel" type="hidden" value="(.*?)" />$', model_detail, re.M)
65         edbmodel = edbmodel[0]
66         edbmodel = edbmodel.replace('&quot;', '').replace(',,', ',').replace('\', '')
67         edbmodel = re.findall(r'indicatorId:(?P<indicatorId>d*?),indicatorName:.*?[(?P<indicatorName>d*?)]', edbmodel, re.M)
68         for model in edbmodel:
69             if model[0] != model[1]:
70                 model_error = str(model_id_detail) #+ ':' + str(model[0]) + ':' + str(model[1])#将错误模型、错误指标id、正确指标id进行拼接
71                 repeat_id.append(model_error)
72     print(repeat_id)#打印错误信息
73     print(len(repeat_id))#打印错误信息长度
74 
75 if __name__ == '__main__':
76     login()#将登录状态保存在session中
77     model_id = get_model_id(num)#将所有的模型id以列表的形式保存在model_id中
78     get_error(model_id)#获取错误信息
原文地址:https://www.cnblogs.com/Goongo/p/9214597.html