多进程下载 断点下载

import glob
import os
import requests, time, threading
os_sep = os.sep
img_dir = 'D:\mylogo\'
def spider_webimg_dl_return_local_img_path(img_dir, img_url, uid, uid_n, local_default='default.DONOT_REMOVE.png'):
    r = '%s%s' % (img_dir, local_default)
    if '.' not in img_url:
        return r
    img_url = img_url.split('?')[0]
    try:
        bytes = requests.get(img_url)._content
        if bytes != 0 and requests.get(img_url).status_code == 200:
            # r = '%s%s%s%s%s%s' % (
            #     img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), '_',
            #     uid,
            #     uid_n,
            #     '.jpg')
            r = '%s%s%s%s' % (img_dir, uid, uid_n, '.jpg')
            with open(r, 'wb')as f:
                f.write(bytes)
    except Exception as e:
        print(e)
        time.sleep(10)
    return r


f = 'dbuid.hadlogo.txt'
with open(f, 'r', encoding='utf-8') as fr:
    for i in fr:
        uid = i.replace('	', '').replace('
', '')
        had = False
        f_img_d = '{}{}{}'.format(img_dir, os_sep, '*.jpg')
        imgs = glob.glob(f_img_d)
        for ii in imgs:
            if uid in ii:
                had = True
                continue
        if not had:
            logo_url = 'http://img.a.r.com/site/34075/logo.jpg'.replace('34475', uid)
            spider_webimg_dl_return_local_img_path(img_dir, logo_url, uid, 'logo')
            time.sleep(0.5)
            pass
        else:
            print(uid)
原文地址:https://www.cnblogs.com/rsapaper/p/8888904.html