scrapy-items

items定义字段名字

import scrapy


class HrItem(scrapy.Item):
    # define the fields for your item here like:
    title = scrapy.Field()
    position = scrapy.Field()
    pub_date = scrapy.Field()

当怕爬取到数据时

            item = HrItem()
            item['title'] = data.xpath("./td[1]/a/text()").extract_first()
            item['position'] = data.xpath("./td[2]/text()").extract_first()
            item['pub_date'] = data.xpath("./td[5]/text()").extract_first()

pipelines储存进mongodb, 需将数据转换成dict

from pymongo import MongoClient
client = MongoClient()
collection = client['SpiderAnything']['hr'] # 库名 表名


class SpideranythingPipeline(object):
    def process_item(self, item, spider):
        if isinstance(item, HrItem):  # 判断对象 选择管道
            print(item)
            collection.insert(dict(item))
            return item
原文地址:https://www.cnblogs.com/tangpg/p/10685266.html