获取大众点评数据

使用.NET实现一个从大众点评抓取一些基础数据(商家名称,地址,电话,经纬度)的小程序。

实现逻辑:

1、以异步方式从列表上获取前三项(商家名称、地址、电话);

2、把获取下来的数据保存到数据库表里;

3、把存在数据表里的地址信息读取出来,通过调用QQ地图API把地址转化成经纬度;

4、按行更新GIS信息。

代码如下:

using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.SqlClient;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Xml;

namespace DianPing_MeiFa
{
    public partial class test : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {
           // string url = "http://www.dianping.com/search/category/2/50/p";
            //this.lblNames.Text = "begin...";
            //for (int i = 1; i < 51; i++)//循环分页
            //{
            //    string url = "http://www.dianping.com/search/category/2/50/p";
            //    url += i;
            //    this.SaveMeiFaData(url);
            //}
            this.setMapLocation();
        }

        /// <summary>
        /// 获取数据
        /// </summary>
        /// <returns></returns>
        private DataTable GetMeifa()
        {
            string sql = "SELECT * FROM t_meifa";
            DataSet ds = SqlHelper.ExecuteDataset(SqlHelper.GetConnection(), CommandType.Text, sql);
            return ds.Tables[0];
        }


        private void setMapLocation()
        {
            DataTable dt = this.GetMeifa();
            IList<MeiFa> mfList = new List<MeiFa>();
            //将DataTable转化成对象
            foreach (DataRow dr in dt.Rows)
            {
                MeiFa mf = new MeiFa
                {
                    Id = int.Parse(dr["id"].ToString()),
                    Name = dr["name"].ToString(),
                    Address = dr["address"].ToString()
                };
                mfList.Add(mf);
            }

            //设置获经纬度
            foreach (MeiFa mf in mfList)
            {
                QQMapGeocoder qmg = this.GetGeocoder(mf.Address);
                mf.lat = qmg.result.location.lat;
                mf.lng = qmg.result.location.lng;

                this.UpdateMeiFaLocation(mf);
            }

        }

        private void UpdateMeiFaLocation(MeiFa mf)
        {
            string sql = "UPDATE t_meifa_bak SET lng=@lng,lat=@lat WHERE id=@id";
            SqlParameter[] sps ={
                                    new SqlParameter("@lng",mf.lng),
                                    new SqlParameter("@lat",mf.lat),
                                    new SqlParameter("@id",mf.Id),
                                };

            SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);
        }

        /// <summary>
        /// 根据地理位置获取经纬度
        /// </summary>
        /// <param name="address">地址</param>
        /// <returns></returns>
        private QQMapGeocoder GetGeocoder(string address)
        {
            string apiMapUrl = "http://apis.map.qq.com/ws/geocoder/v1/?region=北京&address={0}&key=Y5QBZ-DEDR4-3W3U7-XL37W-VVMT6-3KB6K";
            apiMapUrl = string.Format(apiMapUrl, address);
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(apiMapUrl);
            request.Method = "GET";

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            Stream newstream = response.GetResponseStream();
            StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);
            string json = srRead.ReadToEnd();
            QQMapGeocoder qmg = QQMapGeocoder.DeserializeGeocoder(json);
            return qmg;
        }

        /// <summary>
        /// 将列表保存到数据库
        /// </summary>
        /// <param name="url"></param>
        private void SaveMeiFaData(string url)
        {
            HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
            request.Method = "GET";
            request.KeepAlive = true;
            request.ContentType = "application/x-www-form-urlencoded";

            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
            request.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8";

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            Stream newstream = response.GetResponseStream();
            StreamReader srRead = new StreamReader(newstream, Encoding.UTF8);
            string outString = srRead.ReadToEnd();

            IList<MeiFa> mfList = this.getMeiFaList(outString);
            this.InsertDb(mfList);
        }


        private string GetContent(IList<string> list)
        {
            string str = string.Empty;
            foreach (string s in list)
            {
                str += s;
            }
            return str;
        }

        /// <summary>
        /// 将列表数据转化成对象
        /// </summary>
        /// <param name="html"></param>
        /// <returns></returns>
        public IList<MeiFa> getMeiFaList(string html)
        {
            IList<MeiFa> mfList = new List<MeiFa>();
            string reg = @"<ul[^>]*class=""detail""[^>]*>[sS]*?</ul>";
            MatchCollection mc = Regex.Matches(html, reg);
            foreach (Match m in mc)
            {
                string strDom = m.Value;
                strDom = strDom.Replace("&nbsp;", " ");
                MeiFa mf = MeiFa.CreateMeifa(strDom);
                mfList.Add(mf);
            }
            return mfList;
        }

        public string ReplaceHtml(string HTMLStr)
        {
            return Regex.Replace(HTMLStr, "<[^>]*>", "");
        }

        public void InsertDb(IList<MeiFa> mfs)
        {
            foreach (var mf in mfs)
            {
                if (string.IsNullOrEmpty(mf.Name))
                    continue;

                try
                {
                    this.InsertDb(mf);
                }
                catch
                {
                    continue;
                }
            }
        }

        /// <summary>
        /// 插入到数据表里
        /// </summary>
        /// <param name="mf"></param>
        public void InsertDb(MeiFa mf)
        {
            string sql = "INSERT INTO t_meifa(name,address,tel) values(@name,@address,@tel)";

            SqlParameter[] sps ={
                                    new SqlParameter("@name",mf.Name),
                                    new SqlParameter("@address",mf.Address),
                                    new SqlParameter("@tel",mf.Tel),
                                };



            SqlHelper.ExecuteNonQuery(SqlHelper.GetConnection(), CommandType.Text, sql, sps);
        }


        public class MeiFa
        {
            public int Id { get; set; }
            public string Name { get; set; }
            public string Address { get; set; }
            public string Tel { get; set; }
            public float? lng { get; set; }
            public float? lat { get; set; }

            public MeiFa()
            {

            }

            public static MeiFa CreateMeifa(string domStr)
            {
                MeiFa m = new MeiFa();
                try
                {
                    Debug.WriteLine(domStr);

                    XmlDocument dom = new XmlDocument();
                    dom.LoadXml(domStr);

                    XmlNode nameNode = dom.SelectSingleNode("//ul/li[@class="shopname"]/a[@data-hippo-type="shop"]");
                    m.Name = nameNode.InnerText;

                    XmlNode addressNode = dom.SelectSingleNode("//ul/*/li[@class="address"]");
                    string at = ReplaceAddress(addressNode.InnerText);
                    string[] ats = getArr(at);
                    m.Address = ats[0];
                    m.Tel = ats[1];
                }
                catch
                {
                    m = new MeiFa();
                }
                return m;
            }
            /// <summary>
            /// 生成数组
            /// </summary>
            /// <param name="str"></param>
            /// <returns></returns>
            static private string[] getArr(string str)
            {
                string[] tempArr = str.Split(' ');
                List<string> ss = new List<string>();
                for (int i = 0; i < tempArr.Length; i++)
                {
                    if (!string.IsNullOrEmpty(tempArr[i]))
                    {
                        ss.Add(tempArr[i]);
                    }
                }

                return ss.ToArray();
            }

            static private string ReplaceAddress(string s)
            {
                s = s.Replace("地址:", "");
                s = s.Replace("
", "");
                return s;
            }
        }
    }
}
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;

namespace DianPing_MeiFa
{
    //[JsonProperty]
    public class QQMapGeocoder
    {
        public int status { get; set; }
        public string message { get; set; }
        public QQMapResult result { get; set; }

        public static QQMapGeocoder DeserializeGeocoder(string jsonStr)
        {
            JsonSerializerSettings jsz = new JsonSerializerSettings();
            QQMapGeocoder qg = JsonConvert.DeserializeObject<QQMapGeocoder>(jsonStr, jsz);
            return qg;
        }
    }

    public class QQMapResult
    {
        public QQMapLocation location { get; set; }
        public QQMapAddressComponents address_components { get; set; }
        public string similarity { get; set; }
    }

    public class QQMapLocation
    {
        public float? lng { get; set; }
        public float? lat { get; set; }
    }

    public class QQMapAddressComponents
    {
        public string province { get; set; }
        public string city { get; set; }
        public string district { get; set; }
        public string street { get; set; }
        public string street_number { get; set; }

    }
    
}

注:仅供学习使用!

原文地址:https://www.cnblogs.com/wfcfan/p/3746141.html