POST登录码云

# -*- coding: utf-8 -*-
import scrapy
import re

#登陆马云的URL https://gitee.com/login
#form 中 {userLogin:hanjian200}
#cookie

class MayunSpider(scrapy.Spider):
    name = 'mayun'
    allowed_domains = ['gitee.com']
    start_urls = ['http://gitee.com/login']

    def parse(self, response):
        authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first()

        # 构造POST请求,传递给引擎
        yield scrapy.FormRequest(
            "https://gitee.com/login",
            formdata={
                "authenticity_token": authenticity_token,
                "utf8": "✓",
                "commit": "登录",
                "user[login]": "",
                "user[password]": "",
                "user[remember_me]":"0"
            },
            callback=self.home_parse
        )


    def home_parse(self, response):
        data = response.body.decode()
        with open("mayun.html", 'w') as f:
            f.write(data)

原文地址:https://www.cnblogs.com/hanjian200ok/p/9519236.html