ORDER BY today_used ASC' % (MAX_USED_TIMES)

python D:pyminecleanspider_mapget_bd_uid_rest_b.py

python D:pyminecleanspider_mapget_bd_uid_rest.py

python D:pyminecleanspider_mapget_bd_uid_28_other20_b.py
 
#MAX_USED_TIMES = 1900
python D:pyminecleanspider_mapget_bd_uid_28_other20.py

python D:pyminecleanspider_mapget_bd_uid.py

python D:pyminecleanspider_mapget_bd_uid.py

python D:pyminecleanspider_mapget_bd_uid.py

  

import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'

db = 'py_bdspider_status.db'
db = '%s\%s' % (curPath, db)

pcity_list = []
pcity_file = '%s\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
    c_ = 0
    for i in pf:
        c_ += 1
        if c_ == 3:
            c_ = 0
            pcity_list.append(i.replace(' ', '').replace('
', '') + '市')
pcity_sorted_list = sorted(pcity_list)

target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list_pass = target_city_list_big

for i in pcity_list:
    if i not in target_city_list_big:
        target_city_list_pass.append(i)


# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     sql = 'DELETE  FROM  baidu_map_key_used'
#     c.execute(sql)
#     conn.commit()
#     pcity_file = '%s\%s' % (curPath, 'bdmap_key.txt')
#     with open(pcity_file, 'r', encoding='utf-8') as pf:
#         c_ = 0
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace('
', '').split('	')
#             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
#                 author, key, localtime_, 0)
#             c.execute(sql)
#     conn.commit()
#     conn.close()


# db_init_key_table()
# target_city_list = target_city_list[0:11]
# target_city_list = target_city_list[0:11]



def db_get_one_effective():
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES)

    res = c.execute(sql).fetchone()
    if res is None:
        return DB_KEY_EXHAUST
    else:
        return res[0]
    conn.close


def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()


dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\%s\' % (curPath, dir_), '%s\%s\' % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)


def chk_if_requested_file():
    for f in requested_file_dir:
        to_in = f.split('.txt')[0]
        if to_in not in requested_file_list:
            requested_file_list.append(to_in)


chk_if_requested_file()


def write_requested_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
    print('ok', threading.get_ident(), request_name)


def write_requested_exception_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)


request_dic = {}

target_city_list = []


def gen_request_dic_list():
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    fname_open = '%s\%s' % (curPath, fname_source)
    FEXCEL = '%s%s' % (fname_open, '.xlsx')
    data = xlrd.open_workbook(FEXCEL)
    table = data.sheets()[0]
    nrows, ncols = table.nrows, table.ncols
    for i in range(1, nrows):
        l = table.row_values(i)
        dbid, area_code, name_, request_name, type_, city, district, addr, street = l
        # if city not in target_city_list:
        #     continue
        if city in target_city_list_pass:
            continue
        if city not in target_city_list:
            target_city_list.append(city)
        request_name_chk = '%s%s%s' % (city, district, request_name)
        if request_name_chk in requested_file_list:
            continue
        if city not in request_dic:
            request_dic[city] = {}
        if district not in request_dic[city]:
            request_dic[city][district] = {}
            request_dic[city][district] = []
        if request_name not in request_dic[city][district]:
            request_dic[city][district].append(request_name)


gen_request_dic_list()

fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'

# http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'


def fun_(city):
    for district in request_dic[city]:
        for request_name in request_dic[city][district]:
            request_name_chk = '%s%s%s' % (city, district, request_name)
            chk_if_requested_file()
            if request_name_chk in requested_file_list:
                continue
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
            try:
                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_requested_res(request_name_chk, bd_res_json_str)
            except Exception:
                bd_res_json_str = '请求百度-异常'
                write_requested_exception_res(request_name_chk, bd_res_json_str)
                print(bd_res_json_str)


class MyThread(threading.Thread):
    def __init__(self, func, args):
        threading.Thread.__init__(self)
        self.func, self.args = func, args

    def run(self):
        self.func(self.args)


thread_sum = len(target_city_list)


def main():
    threads_list = []
    for nloop in range(0, thread_sum, 1):
        city = target_city_list[nloop]
        thread_instance = MyThread(fun_, (city))
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()


if __name__ == '__main__':
    main()

  

import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'

db = 'py_bdspider_status.db'
db = '%s\%s' % (curPath, db)

pcity_list = []
pcity_file = '%s\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
    c_ = 0
    for i in pf:
        c_ += 1
        if c_ == 3:
            c_ = 0
            pcity_list.append(i.replace(' ', '').replace('
', '') + '')
pcity_sorted_list = sorted(pcity_list)

target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list = []

for i in pcity_list:
    if i not in target_city_list_big:
        target_city_list.append(i)

# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     sql = 'DELETE  FROM  baidu_map_key_used'
#     c.execute(sql)
#     conn.commit()
#     pcity_file = '%s\%s' % (curPath, 'bdmap_key.txt')
#     with open(pcity_file, 'r', encoding='utf-8') as pf:
#         c_ = 0
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace('
', '').split('	')
#             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
#                 author, key, localtime_, 0)
#             c.execute(sql)
#     conn.commit()
#     conn.close()


# db_init_key_table()
target_city_list = target_city_list[11:]


def db_get_one_effective():
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ' % (MAX_USED_TIMES)
    res = c.execute(sql).fetchone()
    if res is None:
        return DB_KEY_EXHAUST
    else:
        return res[0]
    conn.close


def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()


dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\%s\' % (curPath, dir_), '%s\%s\' % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)


def chk_if_requested_file():
    for f in requested_file_dir:
        to_in = f.split('.txt')[0]
        if to_in not in requested_file_list:
            requested_file_list.append(to_in)


chk_if_requested_file()


def write_requested_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
    print('ok', threading.get_ident(), request_name)


def write_requested_exception_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)


request_dic = {}


def gen_request_dic_list():
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    fname_open = '%s\%s' % (curPath, fname_source)
    FEXCEL = '%s%s' % (fname_open, '.xlsx')
    data = xlrd.open_workbook(FEXCEL)
    table = data.sheets()[0]
    nrows, ncols = table.nrows, table.ncols
    for i in range(1, nrows):
        l = table.row_values(i)
        dbid, area_code, name_, request_name, type_, city, district, addr, street = l
        if city not in target_city_list:
            continue
        request_name_chk = '%s%s%s' % (city, district, request_name)
        if request_name_chk in requested_file_list:
            continue
        if city not in request_dic:
            request_dic[city] = {}
        if district not in request_dic[city]:
            request_dic[city][district] = {}
            request_dic[city][district] = []
        if request_name not in request_dic[city][district]:
            request_dic[city][district].append(request_name)


gen_request_dic_list()

fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'

# http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'


def fun_(city):
    for district in request_dic[city]:
        for request_name in request_dic[city][district]:
            request_name_chk = '%s%s%s' % (city, district, request_name)
            chk_if_requested_file()
            if request_name_chk in requested_file_list:
                continue
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
            try:
                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_requested_res(request_name_chk, bd_res_json_str)
            except Exception:
                bd_res_json_str = '请求百度-异常'
                write_requested_exception_res(request_name_chk, bd_res_json_str)
                print(bd_res_json_str)


class MyThread(threading.Thread):
    def __init__(self, func, args):
        threading.Thread.__init__(self)
        self.func, self.args = func, args

    def run(self):
        self.func(self.args)


thread_sum = len(target_city_list)


def main():
    threads_list = []
    for nloop in range(0, thread_sum, 1):
        city = target_city_list[nloop]
        if city not in request_dic:
            continue
        thread_instance = MyThread(fun_, (city))
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()


if __name__ == '__main__':
    main()
import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST'

db = 'py_bdspider_status.db'
db = '%s\%s' % (curPath, db)

pcity_list = []
pcity_file = '%s\%s' % (curPath, '省会城市.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
    c_ = 0
    for i in pf:
        c_ += 1
        if c_ == 3:
            c_ = 0
            pcity_list.append(i.replace(' ', '').replace('
', '') + '市')
pcity_sorted_list = sorted(pcity_list)

target_city_list_big = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
target_city_list = []

for i in pcity_list:
    if i not in target_city_list_big:
        target_city_list.append(i)

# def db_init_key_table():
#     conn = sqlite3.connect(db)
#     c = conn.cursor()
#     sql = 'DELETE  FROM  baidu_map_key_used'
#     c.execute(sql)
#     conn.commit()
#     pcity_file = '%s\%s' % (curPath, 'bdmap_key.txt')
#     with open(pcity_file, 'r', encoding='utf-8') as pf:
#         c_ = 0
#         for i in pf:
#             if len(i) < 4:
#                 continue
#             author, key = i.replace('
', '').split('	')
#             localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
#             sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
#                 author, key, localtime_, 0)
#             c.execute(sql)
#     conn.commit()
#     conn.close()


# db_init_key_table()
# target_city_list = target_city_list[0:11]
# target_city_list = target_city_list[0:11]
target_city_list =target_city_list[11:]

def db_get_one_effective():
    conn = sqlite3.connect(db)
    c = conn.cursor()
    sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES)

    res = c.execute(sql).fetchone()
    if res is None:
        return DB_KEY_EXHAUST
    else:
        return res[0]
    conn.close


def db_update_one_today_used(key):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
        localtime_, key)
    c.execute(sql)
    conn.commit()
    conn.close()


dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\%s\' % (curPath, dir_), '%s\%s\' % (
    curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)


def chk_if_requested_file():
    for f in requested_file_dir:
        to_in = f.split('.txt')[0]
        if to_in not in requested_file_list:
            requested_file_list.append(to_in)


chk_if_requested_file()


def write_requested_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)
    print('ok', threading.get_ident(), request_name)


def write_requested_exception_res(request_name, str_, type_='.txt'):
    fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
    # 上海市虹口区岳阳医院?.txt
    fname = fname.replace('?', '')
    with open(fname, 'w', encoding='utf-8') as ft:
        ft.write(str_)


request_dic = {}


def gen_request_dic_list():
    fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
    fname_open = '%s\%s' % (curPath, fname_source)
    FEXCEL = '%s%s' % (fname_open, '.xlsx')
    data = xlrd.open_workbook(FEXCEL)
    table = data.sheets()[0]
    nrows, ncols = table.nrows, table.ncols
    for i in range(1, nrows):
        l = table.row_values(i)
        dbid, area_code, name_, request_name, type_, city, district, addr, street = l
        if city not in target_city_list:
            continue
        request_name_chk = '%s%s%s' % (city, district, request_name)
        if request_name_chk in requested_file_list:
            continue
        if city not in request_dic:
            request_dic[city] = {}
        if district not in request_dic[city]:
            request_dic[city][district] = {}
            request_dic[city][district] = []
        if request_name not in request_dic[city][district]:
            request_dic[city][district].append(request_name)


gen_request_dic_list()

fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'

# http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下&region=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'


def fun_(city):
    for district in request_dic[city]:
        for request_name in request_dic[city][district]:
            request_name_chk = '%s%s%s' % (city, district, request_name)
            chk_if_requested_file()
            if request_name_chk in requested_file_list:
                continue
            ak = db_get_one_effective()
            if ak == DB_KEY_EXHAUST:
                print(DB_KEY_EXHAUST)
                break
            else:
                url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
            try:
                bd_res_json_str = requests.get(url_).text
                db_update_one_today_used(ak)
                write_requested_res(request_name_chk, bd_res_json_str)
            except Exception:
                bd_res_json_str = '请求百度-异常'
                write_requested_exception_res(request_name_chk, bd_res_json_str)
                print(bd_res_json_str)


class MyThread(threading.Thread):
    def __init__(self, func, args):
        threading.Thread.__init__(self)
        self.func, self.args = func, args

    def run(self):
        self.func(self.args)


thread_sum = len(target_city_list)


def main():
    threads_list = []
    for nloop in range(0, thread_sum, 1):
        city = target_city_list[nloop]
        thread_instance = MyThread(fun_, (city))
        threads_list.append(thread_instance)
    for t in threads_list:
        t.setDaemon = False
        t.start()
    for t in threads_list:
        t.join()


if __name__ == '__main__':
    main()

  

原文地址:https://www.cnblogs.com/rsapaper/p/7450045.html