下载中间件

class DownMiddleware1(object):
    def process_request(self, request, spider):
        """
        请求需要被下载时,经过所有下载器中间件的process_request调用
        :param request: 
        :param spider: 
        :return:  
            None,继续后续中间件去下载;
            Response对象,停止process_request的执行,开始执行process_response
            Request对象,停止中间件的执行,将Request重新调度器
            raise IgnoreRequest异常,停止process_request的执行,开始执行process_exception
        """
"""
from scrapy.http import Request
# print(request)
# request.method = "POST"
request.headers['proxy'] = ' {'ip_port': '111.11.228.75:80', 'user_pass': ''},'
return None
"""


"""
from scrapy.http import Response
import requests
v = request.get('http://www.baidu.com')
data = Response(url='xxxxxx',body=v.content,request=request)
return data

"""

def process_response(self, request, response, spider): """ spider处理完成,返回时调用 :param response: :param result: :param spider: :return: Response 对象:转交给其他中间件process_response Request 对象:停止中间件,request会被重新调度下载 raise IgnoreRequest 异常:调用Request.errback """ print('response1') return response def process_exception(self, request, exception, spider): """ 当下载处理器(download handler)或 process_request() (下载中间件)抛出异常 :param response: :param exception: :param spider: :return: None:继续交给后续中间件处理异常; Response对象:停止后续process_exception方法 Request对象:停止中间件,request将会被重新调用下载 """ return None

##settings设置

DOWNLOADER_MIDDLEWARES = {
# 'sp3.middlewares.Sp3DownloaderMiddleware': 543,
'sp3.middlewares.DownMiddleware1':541,
}

  

原文地址:https://www.cnblogs.com/catherine007/p/8644091.html