爬豆瓣阅读遇到的问题

1.发送get和post请求才能获取response信息,并且把两个response信息分开,只返回post请求的response信息。

class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['read.douban.com']
    page_num = 0
    url1 = 'https://read.douban.com/category/?kind/100&page='
    start_urls = (
        url1+str(page_num),
                 )


    def start_requests(self):
        url = 'https://read.douban.com/j/kind/'  
        headers = {
            "Content-Type": "application/json",
            "Referer": "https://read.douban.com/category/?kind=100&page=0&sort=hot",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36",
        }
        payload = {"sort": "hot", "page": 1, "kind": 100,
                   "query": "
    query getFilterWorksList($works_ids: [ID!]) {
      worksList(worksIds: $works_ids) {
        
    
    title
    cover
    url
    isBundle
  
    
    url
    title
  
    
    author {
      name
      url
    }
    origAuthor {
      name
      url
    }
    translator {
      name
      url
    }
  
    
    abstract
    editorHighlight
  
    
    isOrigin
    kinds {
      
    name @skip(if: true)
    shortName @include(if: true)
    id
  
    }
    ... on WorksBase @include(if: true) {
      wordCount
      wordCountUnit
    }
    ... on WorksBase @include(if: true) {
      
    isEssay
    
    ... on EssayWorks {
      favorCount
    }
  
    
    isNew
    
    averageRating
    ratingCount
    url
  
  
  
    }
    ... on WorksBase @include(if: false) {
      isColumn
      isEssay
      onSaleTime
      ... on ColumnWorks {
        updateTime
      }
    }
    ... on WorksBase @include(if: true) {
      isColumn
      ... on ColumnWorks {
        isFinished
      }
    }
    ... on EssayWorks {
      essayActivityData {
        
    title
    uri
    tag {
      name
      color
      background
      icon2x
      icon3x
      iconSize {
        height
      }
      iconPosition {
        x y
      }
    }
  
      }
    }
    highlightTags {
      name
    }
  
    ... on WorksBase @include(if: false) {
      
    fixedPrice
    salesPrice
    isRebate
  
    }
    ... on EbookWorks {
      
    fixedPrice
    salesPrice
    isRebate
  
    }
    ... on WorksBase @include(if: true) {
      ... on EbookWorks {
        id
        isPurchased
        isInWishlist
      }
    }
  
        id
        isOrigin
      }
    }
  ",
                   "variables": {}}

        yield scrapy.Request(url, headers=headers, body=json.dumps(payload))

2.分开之后处理response信息。

    def parse(self, response):
        Item = DoubanspiderItem()
        books = response.xpath('//div[@class="info"]')
        print(response.text)

        res = json.loads(response.text)["list"]
        for i in res :
            print(i["title"])
            Item["book"] = i["title"]
            Item["author"] = i["origAuthor"]["name"]
            Item["price"] = i["title"]
            Item["number"] = i["wordCount"]
            Item["grade"] = i["title"]
            Item["info"] = i["abstract"]

        yield Item
原文地址:https://www.cnblogs.com/xuezhihao/p/11658776.html