Python使用requests模块下载图片

MySQL中事先保存好爬取到的图片链接地址。

然后使用多线程把图片下载到本地。

# coding: utf-8
import MySQLdb
import requests
import os
import re
from threading import Thread
import datetime

header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                        'Chrome/63.0.3239.132 Safari/537.36'}
file_path = 'F:\mlu2'
if not os.path.exists(file_path):
    os.mkdir(file_path)


class Spider(object):
    def __init__(self, file_path, header):
        self.file_path = file_path
        self.header = header

    @staticmethod
    def timer(func):
        def time_count(*args):
            start_time = datetime.datetime.now()
            func(*args)
            end_time = datetime.datetime.now()
            day = (end_time - start_time).days
            times = (end_time - start_time).seconds
            hour = times / 3600
            h = times % 3600
            minute = h / 60
            m = h % 60
            second = m
            print "爬取完成"
            print "一共用时%s天%s时%s分%s秒" % (day, hour, minute, second)
        return time_count

    def get_link(self):
        conn = MySQLdb.connect(host='localhost',
                               port=3306,
                               user='root',
                               passwd='729814',
                               db='mlu',
                               charset='utf8')
        cur = conn.cursor()
        sql = 'select image from msg limit 100'  # image为事先爬取存到MySQL的图片链接地址
        cur.execute(sql)
        img_link = cur.fetchall()
        return img_link

    def download(self, link):
        filename = re.findall(r'.*/(.+)', link)[0]
        try:
            pic = requests.get(link, headers=self.header)
            if pic.status_code == 200:
                with open(os.path.join(self.file_path)+os.sep+filename, 'wb') as fp:
                    fp.write(pic.content)
                    fp.close()
            print "下载完成"
        except Exception as e:
            print e

    @timer
    def run_main(self):
        threads = []
        links = self.get_link()
        for link in links:
            img = str(link[0])
            t = Thread(target=self.download, args=[img])
            t.start()
            threads.append(t)
        for t in threads:
            t.join()


spider = Spider(file_path, header)
spider.run_main()
原文地址:https://www.cnblogs.com/delav/p/9398825.html