HtmlAgilityPack解析html

参考文档:https://www.cnblogs.com/asxinyu/p/CSharp_HtmlAgilityPack_XPath_Weather_Data.html#_label0

HtmlAgilityPack是一个开源的解析HTML元素的类库,最大的特点是可以通过XPath来解析HMTL

下载地址如下:http://htmlagilitypack.codeplex.com/

XPath教程:http://www.w3school.com.cn/xpath/index.asp

XPath获取方法:

 官方API:http://html-agility-pack.net/parser

        public ActionResult Index()
        {
            //爬取天气预报
            HtmlWeb htmlWeb = new HtmlWeb();
            string url = "http://www.tianqihoubao.com/lishi/taizhou/month/201712.html";
            htmlWeb.OverrideEncoding = Encoding.GetEncoding("gb2312");  //解决乱码问题
            HtmlAgilityPack.HtmlDocument document = htmlWeb.Load(url);
            var res = document.DocumentNode.SelectSingleNode(@"//*[@id='content']/table");
            List<WeatherReport> weatherReports = new List<WeatherReport>();
            if(res!=null)
            {
                var list = res.SelectNodes(@"tr");
                list.RemoveAt(0);//移除第一行,是表头
                // 遍历每一行,获取日期,以及天气状况等信息
            
                foreach (var item in list)
                {

                    var dd = item.SelectNodes(@"td");
                    //日期 -  - 气温 - 风力风向
                    if (dd.Count != 4) continue;
                    //获取当前行日期
                    var date1 = dd[0].InnerText.Replace("
", "").Replace(" ", "").Trim();
                    //获取当前行天气状况
                    var tq = dd[1].InnerText.Replace("
", "").Replace(" ", "").Trim();
                    //获取当前行气温
                    var qw = dd[2].InnerText.Replace("
", "").Replace(" ", "").Trim();
                    //获取当前行风力风向
                    var fx = dd[3].InnerText.Replace("
", "").Replace(" ", "").Trim();
                    //输出
                    Console.WriteLine("{0}:{1},{2},{3}", date1, tq, qw, fx);
                    WeatherReport weatherReport = new WeatherReport {
                        Date = date1,
                        State = tq,
                        Temperature = qw,
                        Wind=fx
                    };
                    weatherReports.Add(weatherReport);
                }
            }
            return View(weatherReports);
        }
View Code
原文地址:https://www.cnblogs.com/liandy0906/p/8085558.html