scrapy-redis

1.scrapy-redis(queue源码)

  •  在scrapy-redis中有三种方法来存放Request对象
  1. # 用这个来配置使用那个方法来存放Request 路径 scrapy-redis.queue.py
          在setting.py中写上 SCHEDULER_QUEUE_CLASS = 'scrapy_redis.queue.PriorityQueue' # 默认使用优先级队列(默认),其他:PriorityQueue(有序集合),FifoQueue(列表)、LifoQueue(列表)

    class FifoQueue(Base):
        """Per-spider FIFO queue"""
        # 先进先出
        def __len__(self):
            """Return the length of the queue"""
            return self.server.llen(self.key)
    
        def push(self, request):
            """插入"""
            self.server.lpush(self.key, self._encode_request(request))
    
        def pop(self, timeout=0):
            """获取"""
            if timeout > 0:
                data = self.server.brpop(self.key, timeout)
                if isinstance(data, tuple):
                    data = data[1]
            else:
                data = self.server.rpop(self.key)
            if data:
                return self._decode_request(data)
    先进先出(FifoQueue)
    class PriorityQueue(Base):
        """Per-spider priority queue abstraction using redis' sorted set"""
        # 有序集合默认使用这个
        def __len__(self):
            """Return the length of the queue"""
            return self.server.zcard(self.key)
    
        def push(self, request):
            """Push a request"""
            data = self._encode_request(request)
            score = -request.priority
            # We don't use zadd method as the order of arguments change depending on
            # whether the class is Redis or StrictRedis, and the option of using
            # kwargs only accepts strings, not bytes.
            self.server.execute_command('ZADD', self.key, score, data)
    
        def pop(self, timeout=0):
            """
            Pop a request
            timeout not support in this queue class
            """
            # use atomic range/remove using multi/exec
            pipe = self.server.pipeline()
            pipe.multi()
            pipe.zrange(self.key, 0, 0).zremrangebyrank(self.key, 0, 0)
            results, count = pipe.execute()
            if results:
                return self._decode_request(results[0])
    有序集合(PriorityQueue)默认使用这个
    class LifoQueue(Base):
        """Per-spider LIFO queue."""
        # 后进先出队列(栈)
        def __len__(self):
            """Return the length of the stack"""
            return self.server.llen(self.key)
    
        def push(self, request):
            """Push a request"""
            self.server.lpush(self.key, self._encode_request(request))
    
        def pop(self, timeout=0):
            """Pop a request"""
            if timeout > 0:
                data = self.server.blpop(self.key, timeout)
                if isinstance(data, tuple):
                    data = data[1]
            else:
                data = self.server.lpop(self.key)
    
            if data:
                return self._decode_request(data)
    先进后出(LifoQueue)
原文地址:https://www.cnblogs.com/wtil/p/10837944.html