C# 获取网页信息

  • 获取网页源码
 ///通过HttpWebResponse 
public  string GetUrlHtml(string url)
        {

            string strHtml = string.Empty;

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();//从Internet资源返回数据流

            if (response.CharacterSet.ToLower() == "gbk")
            {
                using (Stream respStream = response.GetResponseStream())//读取数据流
                {
                    using (StreamReader str = new StreamReader(respStream, Encoding.GetEncoding("gb2312")))//读取数据
                    {
                        strHtml = str.ReadToEnd();
                    }
                }
            }
            else
            {
                using (Stream respStream = response.GetResponseStream())//读取数据流
                {
                    using (StreamReader str = new StreamReader(respStream, Encoding.UTF8))//读取数据
                    {
                        strHtml = str.ReadToEnd();
                    }
                }
            }
            return strHtml;
        }
///通过WebClient 
  private static string htmlcontent(string url)
        {                                           
            System.Net.WebClient wc = new System.Net.WebClient();
            Byte[] pageData = wc.DownloadData(url);
            string s = System.Text.Encoding.Default.GetString(pageData);
            ////s = System.Text.Encoding.UTF8.GetString(pageData);去除中文乱码
            return s;
        }
  • 获取网页元素(HtmlAgilityPack)
using HtmlAgilityPack;
//HtmlNode node = doc.DocumentNode.SelectSingleNode("Xpath");  元素
//HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath); //元素集合                
//

private static Menu Level_chidren_Menu(Menu menu1,string Xpath)
        {                               
            foreach (var item in menu1.ChildMenus)
            {
                HtmlDocument doc1 = new HtmlDocument();//1步:声明
                doc1.LoadHtml(HttpHelper.GetUrlHtml(item.MenuUrl));   //加载HTML源码                                
                HtmlNodeCollection nodes = doc1.DocumentNode.SelectNodes(Xpath);//取节点元素
                foreach (var page in nodes)
                {
                    item.ChildMenus.Add(new Menu() {            
                        LevelMenu = 3,
                        MenuName = page.InnerText,
                        MenuUrl = HttpHelper.baseUrl + HttpHelper.GetHtmlAttribute(page.InnerHtml, "a").Attributes["href"].Value   
                    });
                }
            }
            return menu1;
        }

  

原文地址:https://www.cnblogs.com/Zingu/p/14541846.html