抓取国家气象局天气预报3天!!

/// <summary>
        /// 国家气象局天气预报 3天
        /// </summary>
        /// <returns></returns>
        public static XmlDataDocument GetCMAWeather()
        {
            //http://www.cma.gov.cn/tqyb/
            //http://www.cma.gov.cn/tqyb/weatherdetail/57957.html

            XmlDataDocument objXml = new XmlDataDocument();
            objXml.LoadXml("<root />");
            //抓取所有城市列表
            string content = GetContent("http://www.cma.gov.cn/tqyb/", "gb2312");

            //抓到内容后,开始分析数据
            Regex regex;
            Match mc;
            XmlElement objXmlCityList = objXml.CreateElement("citylist");

            string partten = "\"(?<citycode>[0-9]{5,})\"";
            regex = new Regex(partten, RegexOptions.Compiled | RegexOptions.IgnoreCase);
            objXmlCityList.SetAttribute("vdatetime", DateTime.Now.ToShortDateString());
            Yesun.Edzh.BLL.Log.LogHelperService.WriteError("开始从中国气象局抓取天气预报");
            //"50774"
            int i = 1;
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    //Yesun.Edzh.BLL.Log.LogHelperService.WriteError(i + "、" + mc.Groups["citycode"].Value.Trim());
                    //根据城市代码组合新的地址,抓取城市天气预报
                    if(Convert.ToInt32(mc.Groups["citycode"].Value) > 10000)
                    {
                        string cityUrl = "http://www.cma.gov.cn/tqyb/weatherdetail/" + mc.Groups["citycode"].Value + ".html";
                        //Yesun.Edzh.BLL.Log.LogHelperService.WriteError("CityUrl : " + cityUrl);
                        content = GetContent(cityUrl, "gb2312");
                        //真正开始分析天气预报数据
                        string parttenCity = "3天预报&nbsp;&nbsp;&nbsp;(?<cityname>.*)</div>";
                        Match mcCity;
                        regex = new Regex(parttenCity, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                        int j = 1;
                        for (mcCity = regex.Match(content), j = 1; mcCity.Success; mcCity = mcCity.NextMatch(), j++)
                        {
                            try
                            {
                                XmlElement objXmlElementCity = objXml.CreateElement("city");
                                Yesun.Edzh.BLL.Log.LogHelperService.WriteError("城市 : " + mcCity.Groups["cityname"].Value);
                                objXmlElementCity.SetAttribute("cityname", mcCity.Groups["cityname"].Value.Trim());
                                //读取天气信息
                                //日期
                                string parttenTmp = "<td width=\"(138|137)\" class=\"b-cn\">(?<item1>[^<]+)月(?<item2>[^<]+)日</td>";
                                Match mcTmp;
                                int k = 1;
                                regex = new Regex(parttenTmp, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                                for (mcTmp = regex.Match(content), k = 1; mcTmp.Success; mcTmp = mcTmp.NextMatch(), k++)
                                {
                                    if (k > 3)
                                        break;
                                    //Yesun.Edzh.BLL.Log.LogHelperService.WriteError("日期 : " + mcTmp.Groups["item1"].Value);
                                    XmlElement itemElement = objXml.CreateElement("day");
                                    itemElement.SetAttribute("vdatetime",DateTime.Now.Year+"-"+mcTmp.Groups["item1"].Value+"-"+mcTmp.Groups["item2"].Value);
                                    objXmlElementCity.AppendChild(itemElement);
                                }
                                //天气
                                parttenTmp = "<td width=\"75\" valign=\"middle\" class=\"red-cn\">(?<item1>[^<]+)</td>";
                                k = 1;
                                regex = new Regex(parttenTmp, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                                for (mcTmp = regex.Match(content), k = 1; mcTmp.Success; mcTmp = mcTmp.NextMatch(), k++)
                                {
                                    //Yesun.Edzh.BLL.Log.LogHelperService.WriteError("天气 : " + mcTmp.Groups["item1"].Value);
                                    //加入天气
                                    XmlElement itemElement = (XmlElement)objXmlElementCity.SelectNodes("day")[k-1];
                                    if (itemElement != null)
                                        itemElement.SetAttribute("weather", mcTmp.Groups["item1"].Value);
                                }
                                //气温
                                parttenTmp = "class=\"b-cn\">(?<item1>[^<]+)℃</td>";
                                k = 1;
                                regex = new Regex(parttenTmp, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                                for (mcTmp = regex.Match(content), k = 1; mcTmp.Success; mcTmp = mcTmp.NextMatch(), k++)
                                {
                                    if (k > 3)
                                        break;
                                    //Yesun.Edzh.BLL.Log.LogHelperService.WriteError("气温 : " + mcTmp.Groups["item1"].Value);
                                    //加入气温
                                    XmlElement itemElement = (XmlElement)objXmlElementCity.SelectNodes("day")[k-1];
                                    if (itemElement!=null)
                                        itemElement.SetAttribute("temperature", mcTmp.Groups["item1"].Value + "℃");
                                }
                                //风力/风向
                                parttenTmp = "class=\"b-cn\">(?<item1>[^<]+)风</td>";
                                k = 1;
                                regex = new Regex(parttenTmp, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                                for (mcTmp = regex.Match(content), k = 1; mcTmp.Success; mcTmp = mcTmp.NextMatch(), k++)
                                {
                                    if (k > 3)
                                        break;
                                    //Yesun.Edzh.BLL.Log.LogHelperService.WriteError("风力/风向 : " + mcTmp.Groups["item1"].Value+"风");
                                    //加入风力/风向
                                    XmlElement itemElement = (XmlElement)objXmlElementCity.SelectNodes("day")[k-1];
                                    if (itemElement != null)
                                        itemElement.SetAttribute("wind", mcTmp.Groups["item1"].Value+"风");
                                }
                                //指数查询
                                parttenTmp = "<td width=\"67\" class=\"b-cn\">(?<item1>[^<]+)</td>(\\s*)<td valign=\"middle\" class=\"cn\"><a href=\"(?<xx>[^>]+)\" title=\"(?<item2>[^>]+)\">(?<title>[^<]+)</a></td>";
                                k = 1;
                                string comment = "";
                                regex = new Regex(parttenTmp, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                                for (mcTmp = regex.Match(content), k = 1; mcTmp.Success; mcTmp = mcTmp.NextMatch(), k++)
                                {
                                    //Yesun.Edzh.BLL.Log.LogHelperService.WriteError(mcTmp.Groups["item1"].Value +" : "+mcTmp.Groups["item2"].Value);
                                    comment += mcTmp.Groups["item1"].Value + " : " + mcTmp.Groups["item2"].Value + "<br/>";
                                }
                                XmlElement objXmlComment = objXml.CreateElement("comment");
                                XmlCDataSection objCdata = objXml.CreateCDataSection(comment);
                                objXmlComment.AppendChild((XmlNode)objCdata);
                                objXmlElementCity.AppendChild(objXmlComment);

                                objXmlCityList.AppendChild(objXmlElementCity);
                            }
                            catch (Exception ex)
                            {
                                Yesun.Edzh.BLL.Log.LogHelperService.WriteError(ex.Message);
                            }
                        }
                        objXml.DocumentElement.AppendChild(objXmlCityList);
                    }

                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelperService.WriteError(ex.Message);
                }
            }

            Yesun.Edzh.BLL.Log.LogHelperService.WriteError("成功从中国气象局抓取天气预报!");

            return objXml;
        }

        /// <summary>
        /// 抓取页面接口
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private static string GetContent(string url, string encoding)
        {
            string str = "";
            WebClient client = new WebClient();
            client.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
            client.Headers.Add("Accept-Language", "zh-cn");
            client.Headers.Add("UA-CPU", "x86");
            client.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
            try
            {
                byte[] buffer = client.DownloadData(url);
                if (encoding == "utf-8")
                {
                    str = System.Text.Encoding.GetEncoding("utf-8").GetString(buffer, 0, buffer.Length);
                }
                else
                {
                    str = System.Text.Encoding.GetEncoding("gb2312").GetString(buffer, 0, buffer.Length);
                }
            }
            catch (Exception ex)
            {
                Yesun.Edzh.BLL.Log.LogHelperService.WriteError(ex.Message);
            }
            return str;
        }

原文地址:https://www.cnblogs.com/Fooo/p/617656.html