Python题目5:爬取CFDA数据

import requests

class Cfda:
    # 初始化函数
    def __init__(self):
        # 初始化要提交数据的网址
        self.url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList'

        # 反爬 F12->网络->请求标头
        self.header = { 'Accept': '*/*',
                        'Content-Type': 'application/x-www-form-urlencoded;utf-8',
                        'X-Requested-With': 'XMLHttpRequest',
                        'Referer': 'http://125.35.6.84:81/xk/',
                        'Accept-Language': 'zh-CN',
                        'Accept-Encoding': 'gzip,deflate',
                        'User-Agent': 'Mozilla/5.0(Windows NT 6.3;WOW64;Trident/7.0;rv:11.0) like Gecko',
                        'Host': '125.35.6.84:81',
                        }

        self.f = open('F:BaiduYunDownloadpythoncfda.txt','a')

    def getData(self,data): # 获取数据
        self.html = requests.post(self.url,data = data,headers = self.header)
        # print(self.html.json())
        # 响应正文的内容即为json信息,类同txt文件

    def extractData(self): # 提取信息;firefox->F12->网络->响应
        for i in range(len(self.html.json()['list'])): # 在不清楚有多少个的时候
            self.cfda_data = self.html.json()['list'][i]['EPS_NAME']
            self.f.write(self.cfda_data + '
'*2)

    def fclose(self): # 关闭文件
        self.f.close()

if __name__ == '__main__':
    cfda = Cfda()

    for m in range(1,20):
        data = {'on':'true',
                'page':m,
                'pageSize':'15',
                'productName':'',
                'conditionType':'1',
                'applyname':'',
                'applysn':'',
                }

        cfda.getData(data)
        cfda.extractData()

    cfda.fclose()

  

原文地址:https://www.cnblogs.com/rouge2017/p/8361645.html