纯golang爬虫实战-(六)-关于cookiejar的理解 (2020-02-14 13:50)

对上一篇遗留的cookie的问题,从这里https://studygolang.com/articles/5228找到一篇几年前的代码,原作者golang_yh发表的原文已经不见了,我对代码中的一处小错误进行了修复。

感觉这段代码有助于理解cookiejar 

package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "net/http/cookiejar"
)

var gCurCookies []*http.Cookie
var gCurCookieJar *cookiejar.Jar

func initAll() {
    gCurCookies = nil
    //var err error;
    gCurCookieJar, _ = cookiejar.New(nil)

}

//get url response html
func getUrlRespHtml(url string) string {
    fmt.Printf("getUrlRespHtml, url=%s", url)

    var respHtml string = ""

    httpClient := &http.Client{
        CheckRedirect: nil,
        Jar:           gCurCookieJar,
    }

    httpReq, err := http.NewRequest("GET", url, nil)
    httpResp, err := httpClient.Do(httpReq)
    if err != nil {
        fmt.Printf("http get url=%s response error=%s
", url, err.Error())
    }
    fmt.Printf("httpResp.Header=%s", httpResp.Header)
    fmt.Printf("httpResp.Status=%s", httpResp.Status)

    defer httpResp.Body.Close()

    body, errReadAll := ioutil.ReadAll(httpResp.Body)
    if errReadAll != nil {
        fmt.Printf("get response for url=%s got error=%s
", url, errReadAll.Error())
    }
    //全局保存
    gCurCookies = gCurCookieJar.Cookies(httpReq.URL)

    respHtml = string(body)

    return respHtml
}

func dbgPrintCurCookies() {
    var cookieNum int = len(gCurCookies)
    fmt.Printf("cookieNum=%d", cookieNum)
    for i := 0; i < cookieNum; i++ {
        var curCk *http.Cookie = gCurCookies[i]
        fmt.Printf("
------ Cookie [%d]------", i)
        fmt.Printf("	Name=%s", curCk.Name)
        fmt.Printf("	Value=%s", curCk.Value)
        fmt.Printf("	Path=%s", curCk.Path)
        fmt.Printf("	Domain=%s", curCk.Domain)
        fmt.Printf("	Expires=%s", curCk.Expires)
        fmt.Printf("	RawExpires=%s", curCk.RawExpires)
        fmt.Printf("	MaxAge=%d", curCk.MaxAge)
        fmt.Printf("	Secure=%t", curCk.Secure)
        fmt.Printf("	HttpOnly=%t", curCk.HttpOnly)
        fmt.Printf("	Raw=%s", curCk.Raw)
        fmt.Printf("	Unparsed=%s", curCk.Unparsed)
    }
}

func main() {
    initAll()

    fmt.Printf("====== step 1:get Cookie ======")
    var baiduMainUrl string = "http://www.baidu.com/"
    fmt.Printf("baiduMainUrl=%s", baiduMainUrl)
    getUrlRespHtml(baiduMainUrl)
    dbgPrintCurCookies()

    fmt.Printf("
====== step 2:use the Cookie ======")

    var getapiUrl string = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true"
    getUrlRespHtml(getapiUrl)
    dbgPrintCurCookies()
}

 还有这里 https://segmentfault.com/q/1010000010339661 的实践有助于深入理解cookiejar

原文地址:https://www.cnblogs.com/pu369/p/12318490.html