手动修改key 伪修改内存变量

# -*- coding: UTF-8 -*-
import math
import random
import sys
import threading
import time
from time import ctime, sleep
import requests
import xlrd

target_citycode_list = ['010', '021', '020', '0755']

# target_citycode_list = ['0755']
adcode_dic = {}
FEXCEL = '高德地图API_城市编码对照表.xlsx'
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[1]
nrows = table.nrows
ncols = table.ncols
for i in range(0, nrows):
    l = table.row_values(i)
    name_ = l[0]
    adcode = l[1]
    citycode = l[2]
    if citycode in target_citycode_list:
        if citycode not in adcode_dic:
            adcode_dic[citycode] = {}
        adcode_dic[citycode][adcode] = {}
        adcode_dic[citycode][adcode]['name'] = name_
        adcode_dic[citycode][adcode]['adcode'] = adcode

REQUEST_LIST = []
for i in adcode_dic:
    for ii in adcode_dic[i]:
        REQUEST_LIST.append(adcode_dic[i][ii]['adcode'])
REQUEST_LEN = len(REQUEST_LIST)
EACH_THREAD_REQUEST_NUM = 1

MAX_PAGINATION = 100
QPS = 50
QPS_TIME_UNIT = 1
# http://lbs.amap.com/api/webservice/guide/tools/info
INFOCODE_OK = '10000'
KEY_POOL_LIST = []
touse_key = ''


def dynamic_write_pool_file():
    global KEY_POOL_LIST
    file_name_key_pool = 'key_pool.pool'
    keypoollist_old = KEY_POOL_LIST
    KEY_POOL_LIST = []
    f = open(file_name_key_pool, 'r', encoding='utf-8')
    KEY_POOL_LIST = []
    for i in f:
        try:
            key = i.split('	')[1].split()
            KEY_POOL_LIST.append(key[0])
        except Exception:
            print(Exception)
    f.closed
    d1 = keypoollist_old.reverse()
    d2 = KEY_POOL_LIST.reverse()
    print(63, d1)
    print(64, d2)
    if d1 == d2:
        print(time.time(), '-old')
    else:
        print(time.time(), '66POOL-new')
        # if (d1>d2)-(d1<d2) == 0:
        #     print('64POOL-new')
        # else:
        #     print('66POOL-old')
        # # if cmp(KEY_POOL_LIST_old.reverse(),KEY_POOL_LIST.reverse())==0:
        #     print('64POOL-new')
        # else:
        #     print('66POOL-new')
        # if KEY_POOL_LIST == KEY_POOL_LIST_old:
        #     pass
        # else:
        #     print('POOL-new')


dynamic_write_pool_file()

URL_TYPE = 'http://restapi.amap.com/v3/place/text'
touse_key = ''
# keywords = '&keywords='
OFFSET_NUM = 24
OFFSET = '&offset=%s' % (OFFSET_NUM)
CITYLIMIT = '&citylimit=true'
EXTENTION = '&extention=all'

# 120000    商务住宅    商务住宅相关    商务住宅相关
# 120100    商务住宅    产业园区    产业园区
# 120200    商务住宅    楼宇    楼宇相关
# 120201    商务住宅    楼宇    商务写字楼
# 120202    商务住宅    楼宇    工业大厦建筑物
# 120203    商务住宅    楼宇    商住两用楼宇
# 120300    商务住宅    住宅区    住宅区
# 120301    商务住宅    住宅区    别墅
# 120302    商务住宅    住宅区    住宅小区
# 120303    商务住宅    住宅区    宿舍
# 120304    商务住宅    住宅区    社区中心

# 由于高德至多返回1000条,所以type值以最小粒度请求,逐个请求
POI_TYPES_LIST = ['120000', '120100', '120200', '120201', '120202', '120203', '120300', '120301', '120302', '120303',
                  '120304']
# POI_TYPES = '&types=120000|120100|120200|120201|120202120203|120300|120301|120302|120303|120304'

URL_FOR_CHANGE_KEY = 'http://restapi.amap.com/v3/place/text?key=%s&types=060100&city=010&OFFSET=1'
change_key_qps = 0


def change_key():
    global touse_key, change_key_qps, KEY_POOL_LIST

    dynamic_write_pool_file()
    # 高德没有遵守自己的QPS/日限策略;所不能通过其返回码,来控制key的使用;
    pool_num = len(KEY_POOL_LIST)
    mean_use_key = random.randint(0, pool_num)
    for i in range(mean_use_key, pool_num, 1):
        key = KEY_POOL_LIST[i]
        if key == touse_key:
            if i == pool_num:
                change_key()
                return
            else:
                continue
        touse_key = key
        url = URL_FOR_CHANGE_KEY % (touse_key)
        try:
            change_key_qps += 1
            if change_key_qps % QPS == 0:
                sleep(QPS_TIME_UNIT)
            r = requests.get(url)
            json_ = r.json()
        except Exception:
            print('requests.get(url)', Exception)
            change_key()
            return
        infocode = json_['infocode']
        if not infocode == INFOCODE_OK:
            if i == pool_num:
                sys.exit('NOInvalidKEY')
            change_key()
            return
        return


requests_counter = 0
todo_list = REQUEST_LIST

# {adcode:[[],[]]}
tosupply_dic = {}


def supply_dic(request):
    global tosupply_dic, requests_counter, todo_list, touse_key, POI_TYPES_LIST, OFFSET_NUM
    if requests_counter == 0:
        change_key()
    for type in POI_TYPES_LIST:
        url = '%s?key=%s&city=%s&type=%s%s%s' % (URL_TYPE, touse_key, request, type, OFFSET, CITYLIMIT)
        if requests_counter % QPS == 0:
            sleep(QPS_TIME_UNIT)
        try:
            requests_counter += 1
            r = requests.get(url)
            r_json = r.json()
        except Exception:
            # 冗余
            if request not in todo_list:
                todo_list.append(request)
        infocode = r_json['infocode']
        if infocode == '10000':
            count = r_json['count']
            page_count = math.ceil(int(count) / OFFSET_NUM)
            if page_count > 0:
                for page in range(1, page_count, 1):
                    url_ = '%s&page=%s' % (url, page)
                    print(url_)
                    try:
                        requests_counter += 1
                        r_ = requests.get(url_)
                        r_json_ = r_.json()
                    except Exception:
                        # 冗余
                        if request not in todo_list:
                            todo_list.append(request)
                    infocode_ = r_json_['infocode']
                    if infocode_ == '10000':
                        pois_list = r_json['pois']
                        if request not in tosupply_dic:
                            tosupply_dic[request] = []
                        tosupply_dic[request].append(pois_list)
                        if request in todo_list:
                            list_index = todo_list.index(request)
                            del todo_list[list_index]
                    else:
                        if request not in todo_list:
                            todo_list.append(request)
                        change_key()
            else:
                if request not in todo_list:
                    todo_list.append(request)
                change_key()


MAX_EXCEPTION_URL_NUM = 0


def deal_exception_list():
    global todo_list
    print(todo_list)
    len_ = len(todo_list)
    if len_ > MAX_EXCEPTION_URL_NUM:
        for nloop in range(0, len_, 1):
            adcode = REQUEST_LIST[nloop]
            supply_dic(adcode)
    else:
        return
    deal_exception_list()


class MyThread(threading.Thread):
    def __init__(self, func, args, name=''):
        threading.Thread.__init__(self)
        self.name = name
        self.func = func
        self.args = args

    def run(self):
        self.func(self.args)


def main():
    print('starting at:', ctime())
    threads_list = []
    thread_sum = math.ceil(REQUEST_LEN / EACH_THREAD_REQUEST_NUM)
    for nloop in range(0, thread_sum, 1):
        adcode = REQUEST_LIST[nloop]
        print(184, adcode)
        thread_instance = MyThread(supply_dic, (adcode), supply_dic.__name__)
        threads_list.append(thread_instance)
    # 主进程将在所有非守护进程退出后,退出
    for t in threads_list:
        t.setDaemon = False
        t.start()
    # wait for all thrades to finish
    for t in threads_list:
        t.join()
    deal_exception_list()

    FGEN = 'GEN_GD_business_building.csv'
    fo = open(FGEN, 'w', encoding='utf-8-sig')
    fo.write(
        'id,name,type,typecode,biz_type,address,location,tel,distance,biz_ext,pname,cityname,adname,shopid,shopinfo,poiweight
')
    fo.closed
    fo = open(FGEN, 'a', encoding='utf-8-sig')
    for request in tosupply_dic:
        l = tosupply_dic[request]
        for ll in l:
            for dic_ in ll:
                str = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
' % (
                    dic_['id'], dic_['name'], dic_['type'], dic_['typecode'], dic_['biz_type'],
                    dic_['address'], dic_['location'].replace(',', ' '), dic_['tel'], dic_['distance'], dic_['biz_ext'],
                    dic_['pname'],
                    dic_['cityname'],
                    dic_['adname'],
                    dic_['shopid'],
                    dic_['shopinfo'],
                    dic_['poiweight'])
                fo.write(str)
    fo.closed


if __name__ == '__main__':
    main()
原文地址:https://www.cnblogs.com/rsapaper/p/7265042.html