golang 爬取百度贴吧绝地求生页面

package main

import (
    "github.com/antchfx/htmlquery"
    "io"
    "net/http"
    "os"
    "strconv"
)

func main() {
    base_url := "https://tieba.baidu.com/f?kw=%E7%BB%9D%E5%9C%B0%E6%B1%82%E7%94%9F&ie=utf-8&pn="

    var start ,end int

    start  = 1
    end = 20

    for i:=start;i<=end;i++{
        url  := base_url + strconv.Itoa((i-1)*50)
        result := download(url)

        list  := parse(result)
        f,_ := os.Create( strconv.Itoa(i) +".html")

        for _,v := range list{
            f.WriteString(v+"
")
        }
        f.Close()
    }

}

func download(url string) (result io.Reader)  {
    resp,_ := http.Get(url)

    return resp.Body
}

func parse(resp io.Reader) (result[]string )  {
    doc,_ := htmlquery.Parse(resp)
    list := htmlquery.Find(doc, "//div[@class='t_con cleafix']")

    slice := make([]string, 5)

    for _,n  := range list {
        //number := htmlquery.FindOne(n,".//span[contains(@class,'num')]/text()")
        title := htmlquery.FindOne(n,".//a/@title")
        //numstring := htmlquery.InnerText(number)
        titlestring := htmlquery.SelectAttr(title,"title")

        slice = append(slice, titlestring)
    }

    return slice
}

效果

原文地址:https://www.cnblogs.com/brady-wang/p/13557157.html