httphtml+XPath采集

C#版本的网络爬虫感觉还是很好用的。

 1 HttpHelper http = new HttpHelper();
 2             HttpItem item = new HttpItem() { URL = "http://www.cnblogs.com/danielWise/archive/2011/02/28/1966808.html" };
 3             HtmlDocument html = new HtmlDocument();
 4             string resultHtml = http.GetHtml(item).Html;
 5             html.LoadHtml(resultHtml);
 6 
 7             HtmlAgilityPack.HtmlNode htmlNode = html.DocumentNode;
 8             //要取出的Xpath标签
 9             HtmlAgilityPack.HtmlNode div = htmlNode.SelectSingleNode(".//*[@id="cnblogs_post_body"]");
10             //Console.WriteLine(div.Elements("p").Count());
11             foreach (var mydiv in div.Elements("p"))
12             {
13                 Console.WriteLine(mydiv.InnerText);
14             }
15             Console.ReadKey();
原文地址:https://www.cnblogs.com/MaxJoker/p/5846426.html