C# 网页信息采集(Form.cs)

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;

using System.Web;
using System.Net;
using System.IO;

using System.Data.SqlClient;
//using Microsoft.Office.Interop.Excel;
using System.Threading;
using WebBee;
using System.Text.RegularExpressions;


namespace WebBee
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        /// <summary>
        /// kijiji 网
        /// </summary>
        /// <param ></param>
        /// <param ></param>
        private void button1_Click(object sender, EventArgs e)
        {

            //try
            //{
            //    Microsoft.Office.Interop.Excel.Application objApp = new Microsoft.Office.Interop.Excel.Application();
            //    Microsoft.Office.Interop.Excel.Workbooks workbooks = objApp.Workbooks;


            //    //判断目标文件是否存在,若不存在则先创建再打开,若存在则打开;

            //    string strDestFileFullName = "D:\\datadb.xls";
            //    FileInfo destFile = new FileInfo(strDestFileFullName);
            //    destFile = new FileInfo(strDestFileFullName);
            //    object missing = System.Type.Missing;
            //    objApp.DisplayAlerts = false;
            //    workbooks.Open(strDestFileFullName, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing, missing);


            //    Microsoft.Office.Interop.Excel.Sheets objSheets = objApp.Worksheets;
            //    Microsoft.Office.Interop.Excel._Worksheet objSheet;
            //    objSheet = (Microsoft.Office.Interop.Excel._Worksheet)objSheets.get_Item(1);


            //    int fromId = Convert.ToInt32(this.fromID.Text);
            //    int toId = Convert.ToInt32(this.toId.Text);

            //    for (int j = fromId; j <= toId; j++)
            //    {
            //        string Url = fixTextBox.Text;
            //        Url = Url + j;

            //        int i = j - fromId;
            //        this.listBox1.Items.Add(Url);

            //        //得到指定Url的源码
            //        Encoding encoding = Encoding.GetEncoding("utf-8");
            //        string strWebContent = GetWebContent(Url, encoding);
            //        if (strWebContent == "") continue;

            //        //只取数据部分的Html代码
            //        int titleStart = strWebContent.IndexOf("<title>", 0);
            //        int titleEnd = strWebContent.IndexOf("</title>", 0);
            //        string temp_title = strWebContent.Substring(titleStart, titleEnd - titleStart + 8);
            //        int bodyStart = strWebContent.IndexOf("<div main_view\">", 0);

            //        //没有想要的信息,继续下一个
            //        if (bodyStart == -1) continue;
            //        int bodyEnd = strWebContent.IndexOf("</body>", 0);
            //        string needString = strWebContent.Substring(bodyStart, bodyEnd - bodyStart);
            //        needString = temp_title + needString;
            //        try
            //        {
            //            //生成HtmlDocument
            //            WebBrowser webb = new WebBrowser();
            //            webb.Navigate("about:blank");

            //            HtmlDocument htmldoc = webb.Document.OpenNew(false);
            //            htmldoc.Write(needString);

            //            string title = htmldoc.Title;
            //            string content = htmldoc.GetElementById("view_content").InnerText;


            //            string[] firstlinestrs = title.Split('|');

            //            //如果数据不完整,放弃这个文件
            //            if (firstlinestrs.Length != 3) continue;
            //            string[] areas = firstlinestrs[1].Split(',');
            //            string[] sorts ={ "", "", "" };

            //            if (firstlinestrs[2] != null)
            //            {
            //                sorts = firstlinestrs[2].Split(',');
            //            }
            //            else
            //            {

            //            }

            //            objSheet.Cells[i + 1, 1] = firstlinestrs[0];

            //            //区域--------------------------------------------
            //            objSheet.Cells[i + 1, 2] = areas[0];
            //            if (areas.Length >= 2)
            //            {
            //                objSheet.Cells[i + 1, 3] = areas[1];
            //            }
            //            else
            //            {
            //                objSheet.Cells[i + 1, 3] = "";
            //            }
            //            if (areas.Length >= 3)
            //            {
            //                objSheet.Cells[i + 1, 4] = areas[2];
            //            }
            //            else
            //            {
            //                objSheet.Cells[i + 1, 4] = "";
            //            }

            //            //分类-----------------------------------------
            //            objSheet.Cells[i + 1, 5] = sorts[0];
            //            if (sorts.Length >= 2)
            //            {
            //                objSheet.Cells[i + 1, 6] = sorts[1];
            //            }
            //            else
            //            {
            //                objSheet.Cells[i + 1, 6] = "";
            //            }
            //            if (sorts.Length >= 3)
            //            {
            //                objSheet.Cells[i + 1, 7] = sorts[2];
            //            }
            //            else
            //            {
            //                objSheet.Cells[i + 1, 7] = "";
            //            }

            //            //内容
            //            objSheet.Cells[i + 1, 8] = content;

            //        }
            //        catch (Exception ex)
            //        {

            //        }

            //    }

            //    string filename = "d:\\dd.xls";
            //    objSheet.SaveAs(filename, missing, missing, missing, missing, missing, missing, missing, missing, missing);

            //    workbooks.Close();
            //    objApp.Quit();


            //}
            //catch (Exception ex)
            //{

            //}
        }

        //根据Url地址得到网页的html源码
        private string GetWebContent(string Url, Encoding encoding)
        {
            string strResult = "";
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                //声明一个HttpWebRequest请求
                request.Timeout = 30000;
                //设置连接超时时间
                request.Headers.Set("Pragma", "no-cache");
                // request.Headers.Set("KeepAlive", "true");
                request.CookieContainer = new CookieContainer();
                request.Credentials = CredentialCache.DefaultCredentials;
                request.Referer = Url;

                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";

                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream streamReceive = response.GetResponseStream();

                StreamReader streamReader = new StreamReader(streamReceive, encoding);
                strResult = streamReader.ReadToEnd();
                streamReceive.Close();
                streamReader.Close();
                streamReceive = null;
                streamReader = null;
            }
            catch
            {
                return "";
            }
            return strResult;
        }

        /// <summary>
        /// 点评网
        /// </summary>
        /// <param ></param>
        /// <param ></param>
        private void button2_Click(object sender, EventArgs e)
        {

            int fromID = Convert.ToInt32(this.fromID.Text);
            int toId = Convert.ToInt32(this.toId.Text);
            for (int i = fromID; i < toId; i++)
            {

                string Url = this.fixTextBox.Text + i;

                //得到指定Url的源码
                Encoding encoding = Encoding.GetEncoding("utf-8");
                string strWebContent = GetWebContent(Url, encoding);
                if (strWebContent.IndexOf("该商户不存在</title>") != -1) continue;
                string needstr = "";
                int infostart = strWebContent.IndexOf("<div Main\">", 0);
                int infoend = strWebContent.IndexOf("<div Votes\">", 0);


                needstr = strWebContent.Substring(infostart, infoend - infostart);


                //

                try
                {
                    //生成HtmlDocument
                    WebBrowser webb = new WebBrowser();
                    webb.Navigate("about:blank");

                    HtmlDocument htmldoc = webb.Document.OpenNew(false);
                    htmldoc.Write(needstr);
                    ShopInfo shopInfo = GetShopInfo(htmldoc);
                    shopInfo.Shopurl = Url;


                    this.insertData(shopInfo);


                }
                catch (Exception ex)
                {

                }
            }
        }

        /// <summary>
        /// 取出列表页面的店铺URL
        /// </summary>
        /// <param ></param>
        /// <returns></returns>

        private string[] getNeedpages(HtmlDocument htmldoc)
        {
            string[] s = new string[20];
            for (int i = 0; i < 20; i++)
            {
                string idname = "_ctl0__ctl" + i + "_HShopName";
                string url = htmldoc.GetElementById(idname).GetAttribute("href");
                url = url.Substring(11, url.Length - 11);
                url = "http://www.dianping.com/" + url;
                s[i] = url;
            }

            return s;
        }

        /// <summary>
        /// 取出shopInfo对象
        /// </summary>
        /// <param ></param>
        /// <returns></returns>
        private ShopInfo GetShopInfo(HtmlDocument htmldoc)
        {
            string bodyHtml = htmldoc.Body.InnerHtml;

            ShopInfo Si = new ShopInfo();

            HtmlElementCollection areas_hc = htmldoc.GetElementsByTagName("div");

            if (areas_hc != null)
            {

                Si.Areas = areas_hc[2].InnerText;
                Si.Areas = Si.Areas.Replace(">", "|");

            }


            HtmlElementCollection h1_hc = htmldoc.GetElementsByTagName("h1");


            if (h1_hc.Count != 0)
            {
                Si.Name = h1_hc[0].InnerText;
                Si.Name = Si.Name.Replace("&nbsp;", " ").Trim(); ;

            }
            else
            {
                Si.Name = "";
            }

            HtmlElementCollection span_hc = htmldoc.GetElementsByTagName("span");
            if (span_hc.Count != 0)
            {
                Si.ChildName = span_hc[0].InnerText;
                Si.ChildName = Si.ChildName.Replace("&nbsp;", " ");
            }
            else
            {
                Si.ChildName = "";
            }
            int startAdress = bodyHtml.IndexOf("<B>地址:</B>", 0);
            int endAddress = 0;

            if (startAdress != -1)
            {
                endAddress = bodyHtml.IndexOf("<BR>", startAdress, 200);
                Si.Address = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);
                Si.Address = Si.Address.Replace("&nbsp;", " ").Trim();
                if (Si.Address.IndexOf("<A class") != -1)
                {
                    Si.Address = Si.Address.Substring(0, Si.Address.IndexOf("<A class"));

                }
            }
            else
            {
                Si.Address = "";
            }
            //------------------------------
            startAdress = bodyHtml.IndexOf("<B>电话:</B>", 0);

            if (startAdress != -1)
            {
                endAddress = bodyHtml.IndexOf("<BR>", startAdress, 100);
                Si.Phone = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);
                Si.Phone = Si.Phone.Replace("&nbsp;", " ").Trim();
            }
            else
            {
                Si.Phone = "";
            }

            //------------------------------
            startAdress = bodyHtml.IndexOf("<B>别名:</B>", 0);


            if (startAdress != -1)
            {
                endAddress = bodyHtml.IndexOf("<BR>", startAdress, 100);
                Si.Bm = bodyHtml.Substring(startAdress + 10, endAddress - startAdress - 10);
                Si.Bm = Si.Bm.Replace("&nbsp;", " ").Trim();
            }
            else
            {
                Si.Bm = "";
            }

            //-----------------------------
            if (htmldoc.GetElementById("ShopTag") != null)
            {


                Si.Tag = htmldoc.GetElementById("ShopTag").InnerText;
                Si.Tag = parseTag(Si.Tag);
            }
            else
            {

                Si.Tag = "";

            }
            //-----------------------------ShopDishs
            if (htmldoc.GetElementById("ShopComment") != null)
            {


                Si.Content = htmldoc.GetElementById("ShopComment").InnerText;
            }
            else
            {

                Si.Content = "";

            }
            //-----------------------------
            if (htmldoc.GetElementById("ShopDish") != null)
            {


                Si.Dish = htmldoc.GetElementById("ShopDish").InnerText;
                Si.Dish = parseTag(Si.Dish);
            }
            else
            {

                Si.Dish = "";

            }
            if (htmldoc.GetElementById("ShopDish") != null)
            {
                Si.Imagesurl = htmldoc.GetElementById("ShopPhoto").GetElementsByTagName("IMG")[0].GetAttribute("src");


            }

            htmldoc = null;
            bodyHtml = null;
            areas_hc = null;
            span_hc = null;
            return Si;
        }

        /// <summary>
        /// 解析字符串
        /// </summary>
        /// <param ></param>
        /// <returns></returns>

        private string parseTag(string tagStr)
        {
            string[] temp = tagStr.Split(' ');
            string returnStr = "";
            for (int i = 0; i < temp.Length; i++)
            {
                if (temp[i] != "")
                {
                    returnStr = returnStr + temp[i].Substring(0, temp[i].IndexOf("(")) + "|";
                }

            }
            if (returnStr.EndsWith("|")) returnStr = returnStr.Substring(0, returnStr.Length - 1);

            return returnStr;

        }


        private void insertData(ShopInfo si)
        {


            StringBuilder strSql = new StringBuilder();
            strSql.Append("insert into getTempData(");
            strSql.Append("name,bm,childName,phone,address,tag,dish,content,areas,imagesurl,url");
            strSql.Append(") values (");
            strSql.Append("@name,@bm,@childName,@phone,@address,@tag,@dish,@content,@areas,@imagesurl,@url)");

            SqlParameter[] parameters = {
     new SqlParameter("@name", SqlDbType.VarChar,128),
     new SqlParameter("@bm",SqlDbType.VarChar,50),
     new SqlParameter("@childName", SqlDbType.VarChar,50),
     new SqlParameter("@phone", SqlDbType.VarChar,100),
     new SqlParameter("@address", SqlDbType.VarChar,200),
     new SqlParameter("@tag", SqlDbType.VarChar,256),
     new SqlParameter("@dish", SqlDbType.VarChar,256),
     new SqlParameter("@content", SqlDbType.VarChar,4096),
     new SqlParameter("@areas", SqlDbType.VarChar,256),
     new SqlParameter("@imagesurl", SqlDbType.VarChar,128),
                    new SqlParameter("@url", SqlDbType.VarChar,64)
                                };
            parameters[0].Value = si.Name;
            parameters[1].Value = si.Bm;
            parameters[2].Value = si.ChildName;
            parameters[3].Value = si.Phone;
            parameters[4].Value = si.Address;
            parameters[5].Value = si.Tag;
            parameters[6].Value = si.Dish;
            parameters[7].Value = si.Content;
            parameters[8].Value = si.Areas;
            parameters[9].Value = si.Imagesurl;
            parameters[10].Value = si.Shopurl;

            DBHelper.ExecuteSql(strSql.ToString(), parameters);
        }
        private bool mIsRunCtrip = false;

        public bool IsRunCtrip
        {
            get { return mIsRunCtrip = false; }
            set { mIsRunCtrip = value; }
        }
        delegate void dFrist(int pIndext);
        delegate void d8JMain(int i);
        dFrist dfrist = null;
        d8JMain d8jmain = null;
        private void button3_Click(object sender, EventArgs e)
        {
            progressBar1.Maximum = Convert.ToInt32(this.toId.Text) - Convert.ToInt32(this.fromID.Text);
            this.Text = "正在采集携程网,请等待。。。";
            dfrist = new dFrist(showmsg);
            Thread tCtrip = new Thread(StartCtrip);
            tCtrip.IsBackground = true;
            tCtrip.Start();


            // this.fixTextBox.Text = "已完成携程网采集,请根目录查看日志";
         
        }
        void showmsg(int i)
        {

            progressBar1.Value++;
            label4.Text = "现在ID:" + i.ToString() + ",已完成:" + (progressBar1.Value / (float)progressBar1.Maximum).ToString("p");
            if (progressBar1.Value == progressBar1.Maximum)
                MessageBox.Show("finish");
        }
        void StartCtrip()
        {
            CtripInfo ci = null;
            int fromID = Convert.ToInt32(this.fromID.Text);
            int toId = Convert.ToInt32(this.toId.Text);
            for (int i = fromID; i <= toId; i++)
            {
                this.BeginInvoke(dfrist, new object[] { i });
                try
                {
                    ci = new CtripInfo().GetCtripInfoByHotelID(i);
                    if (ci != null)
                    {
                        int System.Windows.Forms.Application.DoEvents();
                        if (null != ci.HotelPrice)
                        {
                            for (int ii = 1; ii < ci.HotelPrice.Count; ii++)
                            {
                                ci.AddHotelPrice(ci.HotelPrice[ii], id);
                            }
                        }
                        else
                        {
                            continue;
                        }
                    }
                    else
                    {
                        continue;
                    }
                    ci = null;

                }
                catch (Exception ex)
                {

                    TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "\\CtripErrorlog.log", true);
                    tw.Flush();
                    tw.WriteLine("错误ID: " + i.ToString() + "\r\n 错误原因:" + ex.Message);
                    tw.WriteLine("-------------------------------------------");
                    tw.Flush();
                    tw.Close();
                    tw = null;
                    ci = null;
                    continue;
                }
            }

        }
        void GetCtripHotel(int pStart, int pEnd)
        {
            IsRunCtrip = false;
        }

        private void button4_Click(object sender, EventArgs e)
        {
            //System.Data.DataSet dt = DBHelper.Query("select * from hotel price");
            //string sql = "update hotelprice2 set RoomNewPrice='{0}',BroadBand='{1}',BedType='{2}' where ";
            //if (dt != null)
            //{
            //    for (int i = 0; i < dt.Tables[0].Rows.Count; i++)
            //    {
            //        string id"].ToString();
            //        string Oldprice = dt.Tables[0].Rows[i]["RoomNewPrice"].ToString();
            //        string Oldband = dt.Tables[0].Rows[i]["BroadBand"].ToString();
            //        string OldBedType = dt.Tables[0].Rows[i]["BroadBand"].ToString();
            //        if (Oldprice.IndexOf("&nbsp") > 1)
            //        {
            //            int start = Oldprice.IndexOf("&nbsp");
            //            string newprice = Oldprice.Substring(0, start);
            //            string newband = Oldprice.Substring(0, start + 5);

            //            sql = string.Format(sql, newprice, newband, Oldband);
            //            if (DBHelper.ExecuteSql(sql) > 0)
            //            {
            //                newband = newprice = null;
            //                continue;
            //            }
            //        }
            //        else
            //        {
            //            continue;
            //        }

            //    }


            //}

            //System.Data.DataSet dt = DBHelper.Query("select * from hotelinfo");
            //string sql = "update hotelinfo set subshop='{0}',Name='{1}' where ;
            //if (dt != null)
            //{
            //    for (int i = 0; i < dt.Tables[0].Rows.Count; i++)
            //    {
            //        sql = "update hotelinfo set subshop='{0}',Name='{1}' where ;
            //        string id"].ToString();
            //        string OldName = dt.Tables[0].Rows[i]["Name"].ToString();
            //        if (OldName.IndexOf("(") > 1)
            //        {
            //            int start = OldName.IndexOf("(");
            //            string newName = OldName.Substring(0, start);
            //            string subshop = OldName.Substring(start + 1, OldName.Length - start - 2);
            //            sql = string.Format(sql, subshop, newName, id);

            //            if (DBHelper.ExecuteSql(sql) > 0)
            //            {
            //                = newName = subshop = null;
            //                continue;
            //            }
            //        }
            //        else
            //        {
            //            continue;
            //        }

            //    }


            //}
            //          北京    >                          朝阳区    >                          燕莎/酒仙桥/丽都         
            System.Data.DataSet dt = DBHelper.Query("select * from hotelinfo");
            string sql = "update hotelinfo set position='{0}' where ;
            if (dt != null)
            {
                for (int i = 37; i < dt.Tables[0].Rows.Count; i++)
                {
                    sql = "update hotelinfo set position='{0}' where ;
                    string id"].ToString();
                    string OldName = dt.Tables[0].Rows[i]["area"].ToString();
                    if (OldName.IndexOf(">") > 1)
                    {
                        OldName = OldName.Replace(" ", "");
                        int start = OldName.IndexOf(">");
                        start++;
                        int end = OldName.IndexOf(">", start);
                        if (end > -1)
                            OldName = OldName.Substring(start, end - start);
                        else
                            OldName = OldName.Substring(start);
                        sql = string.Format(sql, OldName, id);

                        if (DBHelper.ExecuteSql(sql) > 0)
                        {
                            = null;
                            continue;
                        }
                    }
                    else
                    {
                        continue;
                    }
                }
            }
            MessageBox.Show("i am ok");

        }
        /// <summary>
        /// 删除HTML标识
        /// </summary>
        /// <param ></param>
        /// <returns></returns>
        public string DropHTMLTag(string htmlString)
        {
            htmlString = Regex.Replace(htmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            htmlString = Regex.Replace(htmlString, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
            htmlString = Regex.Replace(htmlString, @"-->", "", RegexOptions.IgnoreCase);
            htmlString = Regex.Replace(htmlString, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            htmlString = Regex.Replace(htmlString, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            htmlString = Regex.Replace(htmlString, @"<!--.*", "", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
            //htmlString = Regex.Replace(htmlString, @"&#(\d+);", "", RegexOptions.IgnoreCase);

            //htmlString.Replace("<", "");
            //htmlString.Replace(">", "");
            //htmlString.Replace("\r\n", "");


            return htmlString;
        }

        #region 悟能啊悟能


        private void btn8j_Click(object sender, EventArgs e)
        {
            progressBar1.Maximum = 18;
            this.Text = "正在采集悟能,请等待。。。";
            d8jmain = new d8JMain(showmsg8j);
            Thread tCtrip = new Thread(Start8j);
            tCtrip.IsBackground = true;
            tCtrip.Start();
            Start8j();

        }
        void showmsg8j(int i)
        {

            progressBar1.Value++;
            label4.Text = string.Format("现是ID:{0},已完成:{1}" + i, (progressBar1.Value / (float)progressBar1.Maximum).ToString("p"));
            if (progressBar1.Value == progressBar1.Maximum)
                MessageBox.Show("finish");
        }
        void Start8j()
        {


            string url = "http://bj.8j.com/biz/restaurants/BJS0{0}";
            List<string> HotelLink = new List<string>();
            for (int i = 1; i <= 18; i++)
            {

                url = string.Format("http://bj.8j.com/biz/restaurants/BJS0{0:d2}", i);
                url = GetWebContent(url, Encoding.UTF8);
                if (null == url && string.Empty == url && url.IndexOf("对不起") < 1)
                {
                    continue;
                }
                int count = GetAreaHotelCount(url);
                url = GetShortHTMLContent(url);

                HotelLink.AddRange(GetHtml(url));
                for (int ss = 0; ss < HotelLink.Count; ss++)
                {
                    insert8jAllHotelLink(HotelLink[ss], i);
                }
                HotelLink.Clear();
                for (int j = 2; j <= count; j++)
                {
                    try
                    {

                        // Thread.Sleep(10);

                        url = null;
                        url = string.Format("http://bj.8j.com/biz/restaurants/BJS0{0:d2}/{1}", i, j);
                        url = GetWebContent(url, Encoding.UTF8);
                        if (null == url && string.Empty == url && url.IndexOf("对不起") < 1)
                        {
                            continue;
                        }
                        url = GetShortHTMLContent(url);
                        HotelLink.AddRange(GetHtml(url));
                        for (int ss = 0; ss < HotelLink.Count; ss++)
                        {
                            insert8jAllHotelLink(HotelLink[ss], i);
                        }
                        HotelLink.Clear();
                        // this.BeginInvoke(d8jmain, new object[] { i, j, count });
                    }
                    catch (Exception ex)
                    {

                        TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "\\CtripErrorlog.log", true);
                        tw.Flush();
                        tw.WriteLine("错误ID: " + j.ToString() + "\r\n 错误原因:" + ex.Message);
                        tw.WriteLine("-------------------------------------------");
                        tw.Flush();
                        tw.Close();
                        tw = null;
                        continue;
                    }
                }


            }
            url = null;

        }
        /// <summary>
        /// 把所有抓到的HTMlLink存取
        /// </summary>
        /// <param ></param>
        /// <returns></returns>
        bool insert8jAllHotelLink(string kk, int id)
        {
            string sql = "INSERT INTO [a]([ssss],area) VALUES('{0}','{1}')";
            sql = string.Format(sql, kk, GetAreaInfo(id));
            return DBHelper.ExecuteSql(sql) > 0;
        }
        string GetAreaInfo(int id)
        {
            switch (id)
            {
                case 1:
                    return "朝阳区";

                case 2:
                    return "海淀区";

                case 3:

                    return "东城区";

                case 4:
                    return "西城区";
                case 5:
                    return "宣武区";
                case 6:

                    return "崇文区";
                case 7:
                    return "丰台区";
                case 8:
                    return "石景山区";
                case 9:
                    return "房山区";
                case 10:
                    return "通州区";
                case 11:
                    return "昌平区";
                case 12:
                    return "顺义区";
                case 13:
                    return "大兴区";
                case 14:
                    return "怀柔区";
                case 15:
                    return "门头沟区";
                case 16:
                    return "平谷区";
                case 17:
                    return "延庆县";
                case 18:
                    return "密云县";
                default:
                    return "其它地区";

            }


        }
        /// <summary>
        /// 去掉干扰字符
        /// </summary>
        /// <param ></param>
        /// <returns></returns>
        string GetShortHTMLContent(string line)
        {
            int start = 0, end = 0;

            start = line.IndexOf("<!-- bizlist 列表1开始  -->");
            end = line.IndexOf("<!-- bizlist 列表1结束  -->", start);
            line = line.Substring(start, end - start);
            line = line.Replace("\r", "");
            line = line.Replace("\n", "");
            line = line.Replace("\t", "");
            line = line.Replace("<a", "ウ");
            line = line.Replace("/a>", "ウ");
            return DropHTMLTag(line);


        }
        /// <summary>
        /// 页的所有酒店
        /// </summary>
        /// <param ></param>
        /// <returns>string[]</returns>
        string[] GetHtml(string line)
        {
            try
            {

                string tmp = null;
                int start = 0, end = 0;
                List<string> sss = new List<string>();
                while (line.IndexOf("href=\"", start) > 0)
                {
                    line = line.Replace(" ", "");
                    start = line.IndexOf("href=\"", start);
                    if (start < 1)
                        continue;

                    start = start + 6;
                    end = line.IndexOf("\"target", start);
                    if (end < 1)
                        continue;
                    tmp = line.Substring(start, end - start);
                    if (sss.Count == 0)
                    {

                        sss.Add(tmp);
                    }
                    else
                    {
                        if (sss[sss.Count - 1] == tmp)
                        {
                            continue;
                        }
                        else
                        {
                            sss.Add(tmp);
                        }
                    }

                    start = end;
                }
                return sss.ToArray();
            }
            catch (Exception ex)
            {
                return null;

            }

        }
        /// <summary>
        /// 获得酒店总页数
        /// </summary>
        /// <param ></param>
        /// <returns></returns>
        int GetAreaHotelCount(string line)
        {
            int start = 0, end = 0;
            start = line.IndexOf("<!-- pgdn start  -->");
            if (start < 1) return 0;

            end = line.IndexOf("<!-- pgdn end  -->", start);
            if (end < 1) return 0;
            line = line.Substring(start, end - start);
            line = DropHTMLTag(line);
            start = line.IndexOf("共");
            if (start < 1) return 0;
            start++;
            end = line.IndexOf("页", start);
            if (end < 1) return 0;
            return int.Parse(line.Substring(start, end - start));

        }
        #endregion


        #region 悟能详细页


        private void button5_Click(object sender, EventArgs e)
        {
            progressBar1.Maximum = int.Parse(DBHelper.GetSingle("select count(*) from a").ToString());
            this.Text = "正在采集悟能,请等待。。。";
            dfrist = new dFrist(showmsg);
            Thread tCtrip = new Thread(beginGetHotel);
            tCtrip.IsBackground = true;
            tCtrip.Start();
            //  beginGetHotel();
        }
        void beginGetHotel()
        {
            System.Data.DataSet dt = DBHelper.Query("select * from a");
            string url = null, tmp = null;
            string tel = null, jAdd = null, onlyfoot = null, Address = null, postcode = null, area = null, tag = null, siteurl = null, remark = null;
            for (int i = 0; i < dt.Tables[0].Rows.Count; i++)
            {

                try
                {
                    this.BeginInvoke(dfrist, new object[] { i });
                    jAdd = url = dt.Tables[0].Rows[i][1].ToString();
                    url = GetWebContent(url, Encoding.UTF8);
                    url = url.Replace("\r", "");
                    url = url.Replace("\n", "");
                    url = url.Replace("\t", "");
                    if (null != url && string.Empty != url)
                    {
                        int start = 0, end = 0;
                        start = url.IndexOf("<!-- qyxx  开始  -->");
                        if (start < 1)
                        {
                            continue;
                        }
                        end = url.IndexOf("<!-- info nav end -->", start);
                        tmp = url.Substring(start, end - start);
                        start = tmp.IndexOf("<strong>");
                        if (start < 1)
                        {
                            ;
                        }
                        else
                        {
                            start += 8;
                            end = tmp.IndexOf("</strong>", start);
                            end - start);
                            }

                        start = tmp.IndexOf("<strong>", end);
                        start += 8;
                        end = tmp.IndexOf("</strong>", start);
                        tel = tmp.Substring(start, end - start);
                        start = tmp.IndexOf("<p>", end);
                        start += 3;
                        end = tmp.IndexOf("<br/>", start);
                        Address = tmp.Substring(start, end - start);
                        Address = ReplaceSingleQuotes(Address);
                        start = end;
                        start += 5;
                        end = tmp.IndexOf("</p>", end);
                        postcode = tmp.Substring(start, end - start);
                        //area=dt.Tables[0].Rows[i][2].ToString();
                        start = tmp.IndexOf("区域:", end);
                        if (start < 1)
                        {
                            area = "未知";
                        }
                        else
                        {
                            start += 3;
                            end = tmp.IndexOf("</p>", start);
                            area = DropHTMLTag(tmp.Substring(start, end - start));
                            area = ReplaceSingleQuotes(area);
                        }


                        start = tmp.IndexOf("标签:", end);
                        if (start < 1)
                        {
                            tag = "未知";
                        }
                        else
                        {
                            start += 3;
                            end = tmp.IndexOf("</p>", start);
                            tag = DropHTMLTag(tmp.Substring(start, end - start));
                            tag = ReplaceSingleQuotes(tag);
                        }


                        start = tmp.IndexOf("网址:", end);
                        if (start < 1)
                        {
                            siteurl = "未知";
                        }
                        else
                        {
                            start += 3;
                            end = tmp.IndexOf("</p>", start);
                            siteurl = DropHTMLTag(tmp.Substring(start, end - start));
                        }
                        start = url.IndexOf("特色推荐:");
                        if (start < 1)
                        {
                            onlyfoot = "未知";
                        }
                        else
                        {
                            start += 5;
                            end = url.IndexOf("</p>", start);
                            onlyfoot = url.Substring(start, end - start);
                            onlyfoot = ReplaceSingleQuotes(onlyfoot);
                        }

                        start = url.IndexOf("<!-- jj开始  -->");
                        if (start < 1)
                        {
                            remark = "未知";
                        }
                        else
                        {

                            end = url.IndexOf("<!-- jj结束  -->", start);
                            tmp = url.Substring(start, end - start);
                            if (tmp.IndexOf("简介") < 1)
                            {
                                remark = "未知";

                            }
                            else
                            {
                                start = tmp.IndexOf("简介");
                                end = tmp.IndexOf("</h4>", start);
                                remark = DropHTMLTag(tmp.Substring(start, end - start));
                                remark = ReplaceSingleQuotes(remark);
                            }

                        }

                        inser8jHotlInfo(name, tel, onlyfoot, Address, postcode, area, tag, siteurl, remark, jAdd);
                        = onlyfoot = Address = postcode = area = tag = siteurl = remark = null;
                    }
                }
                catch (Exception ex)
                {

                    TextWriter tw = new StreamWriter(System.Windows.Forms.Application.StartupPath + "\\CtripErrorlog.log", true);
                    tw.Flush();
                    tw.WriteLine("错误ID: " + i.ToString() + "\r\n 错误原因:" + ex.Message);
                    tw.WriteLine("-------------------------------------------");
                    tw.Flush();
                    tw.Close();
                    tw = null;
                    continue;
                }
            }

        }
        /// <summary>
        /// 替换单引号成中文的单引号
        /// </summary>
        /// <param ></param>
        /// <returns></returns>
        public string ReplaceSingleQuotes(string hTML)
        {
            return hTML.Replace("\'", "`");
        }
        void inser8jHotlInfo(string name, string tel, string onlyfoot, string Address, string postcode, string area, string tag, string siteurl, string remark, string jAdd)
        {
            string sql = "INSERT INTO [HotelInfo]([name], [Address], [Tel], [Postcode], [area], [tag], [siteurl], [onlyfoot], [remark],[8jAddress]) VALUES('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}')";
            sql = string.Format(sql, name, Address, tel, postcode, area, tag, siteurl, onlyfoot, remark, jAdd);
            DBHelper.ExecuteSql(sql);
        }

        #endregion

        private void Form1_Load(object sender, EventArgs e)
        {

        }

        private void button6_Click(object sender, EventArgs e)
        {
            System.Data.DataSet dt1 = DBHelper.Query("select star5,city from [00]");
            System.Data.DataSet dt2 = DBHelper.Query("select id,city from ctriptwo");
            string sql = null;
            for (int i = 0; i < dt2.Tables[0].Rows.Count; i++)
            {
                for (int ii = 0; ii < dt1.Tables[0].Rows.Count; ii++)
                {
                    if (dt2.Tables[0].Rows[i]["city"].ToString() == dt1.Tables[0].Rows[ii]["city"].ToString())
                    {
                        sql = "UPDATE ctriptwo set startLevelcount0={0} where ;
                        sql = string.Format(sql, dt1.Tables[0].Rows[ii]["star5"], dt2.Tables[0].Rows[i]["id"]);
                        DBHelper.ExecuteSql(sql);
                    }
                    else
                        continue;

                }

                /*
         INSERT INTO [LocalTest].[dbo].[ctripTwo]
           ([startLevelcount0]
           ,[startLevel5count4]
           ,[startLevel5count5]
           ,[startLevel5count3]
           ,[startLevel5count2]
           ,[startLevel5count1]
           ,[City])
     VALUES
                 */
               
              
            }
            MessageBox.Show("ok");
        }


       

        private void button7_Click(object sender, EventArgs e)
        {
           
            this.Text = "正在采集点评,请等待...";
            label5.Text=System.DateTime.Now.ToString("hh时mm分ss秒");          
            Thread tCtrip = new Thread(Shop);
            tCtrip.IsBackground = true;
            tCtrip.Start();                      
                                 
        }
        public void Shop()
        {
            int shopid;
            int log = 0; int iDivStart = 0;
            string StrSql="";
            string ShopTag = "";
            string DianpingInfo="";
           
            string ShopCity = ""; string ShopArea = ""; string ShopID = ""; string ShopName = ""; string ShopSort = ""; string ShopAddress = ""; string ShopPhone = "";
            try
            {
                for (shopid = 1612436; shopid < 2700000; shopid++)
                {
                    string Url = "http://www.dianping.com/shop/" + shopid;
                    string strResult = GetHtmlCode(Url);
                    if (ReturnMsg(strResult))
                    {
                        try
                        {
                            string divStart = @"<div ShopGuide"">";
                            string divEnd = @"<div Reviews"">";
                            iDivStart = strResult.IndexOf(divStart);
                            int iDivEnd = strResult.IndexOf(divEnd);
                            DianpingInfo = strResult.Substring(iDivStart, iDivEnd - iDivStart);
                        }
                        catch (Exception ex)
                        {

                            TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                            tw.Flush();
                            tw.WriteLine("错误原因:" + ex.Message + Url);
                            tw.WriteLine("-------------------------------------------");
                            tw.Flush();
                            tw.Close();
                            tw = null;

                        }
                        //提取导航条中信息
                        try
                        {
                            string NavigationInfo = @"<div Shop"">";
                            int NavigationEnd = strResult.IndexOf(NavigationInfo);
                            string Navigation = strResult.Substring(iDivStart, NavigationEnd - iDivStart);
                            string str = DropHTMLTag(Navigation).Replace("&nbsp;", "ml").Replace(">", "ml");
                            string ml = "ml";
                            string[] resultString = Regex.Split(str, ml, RegexOptions.IgnoreCase);
                            string str1 = (resultString.Length).ToString();
                            string str2 = "5";
                            string str3 = "7";
                            string str4 = "6";

                            if (str1 == str2)
                            {
                                try
                                {
                                    //提取地址ShopAddress
                                    int AddressEnd = DianpingInfo.IndexOf("地图");
                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                    if (AddressEnd > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else
                                    {
                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }

                                //商店ID名ShopID
                                ShopID = shopid.ToString();
                                //提取所在城市ShopCity
                                ShopCity = resultString[0].ToString();
                                //商店名ShopName
                                ShopName = resultString[4].ToString().Replace("'", ".");
                                //商店所在区
                                ShopArea = resultString[2].ToString();
                                //商店属于购物类别
                                ShopSort = resultString[3].ToString();

                                try
                                {   //商店联系电话
                                    int PhoneStart = strResult.IndexOf("电话:");
                                    //有电话走这边,没电话不添加
                                    if (PhoneStart.ToString() != "-1")
                                    {
                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                    }
                                    else
                                    {
                                        ShopPhone = "无联系电话";
                                    }
                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }


                                try
                                {
                                    //分类标签ShopTag
                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                }

                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }

                            }
                            else if (str1 == str3)
                            {

                                try
                                {
                                    //提取地址ShopAddress
                                    int AddressEnd = DianpingInfo.IndexOf("地图");
                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                    if (AddressEnd > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else
                                    {
                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }

                                //商店ID名ShopID
                                ShopID = shopid.ToString();
                                //提取所在城市ShopCity
                                ShopCity = resultString[0].ToString();
                                //商店名ShopName
                                ShopName = resultString[5].ToString().Replace("'", ".");
                                //商店所在区
                                ShopArea = resultString[2].ToString();
                                //商店属于购物类别
                                ShopSort = resultString[4].ToString();

                                try
                                {   //商店联系电话
                                    int PhoneStart = strResult.IndexOf("电话:");
                                    //有电话走这边,没电话不添加
                                    if (PhoneStart.ToString() != "-1")
                                    {
                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                    }
                                    else
                                    {
                                        ShopPhone = "无联系电话";
                                    }

                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }


                                try
                                {
                                    //分类标签ShopTag
                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                }

                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }

                            }
                            else if (str1 == str4)
                            {

                                try
                                {
                                    //提取地址ShopAddress
                                    int AddressEnd = DianpingInfo.IndexOf("地图");
                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                    if (AddressEnd > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else
                                    {
                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }

                                //商店ID名ShopID
                                ShopID = shopid.ToString();
                                //提取所在城市ShopCity
                                ShopCity = resultString[0].ToString();
                                //商店名ShopName
                                ShopName = resultString[5].ToString().Replace("'", ".");
                                //商店所在区
                                ShopArea = resultString[3].ToString();
                                //商店属于购物类别
                                ShopSort = resultString[4].ToString();

                                try
                                {   //商店联系电话
                                    int PhoneStart = strResult.IndexOf("电话:");
                                    //有电话走这边,没电话不添加
                                    if (PhoneStart.ToString() != "-1")
                                    {
                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                    }
                                    else
                                    {
                                        ShopPhone = "无联系电话";
                                    }

                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }


                                try
                                {
                                    //分类标签ShopTag
                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                }

                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }

                            }
                            else
                            {
                                try
                                {
                                    //提取地址ShopAddress
                                    int AddressEnd = DianpingInfo.IndexOf("地图");
                                    int AddressStart = DianpingInfo.IndexOf("<b>地址:");
                                    if (AddressEnd > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else if ((AddressEnd = DianpingInfo.IndexOf("电话:")) > 0)
                                    {
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                    else
                                    {
                                        AddressEnd = DianpingInfo.IndexOf(@"<div Update"">");
                                        ShopAddress = DropHTMLTag(DianpingInfo.Substring(AddressStart, AddressEnd - AddressStart)).Replace("地址:", "").Replace("&nbsp;", "").Replace("地图", "");
                                    }
                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }

                                //商店ID名ShopID
                                ShopID = shopid.ToString();
                                //提取所在城市ShopCity
                                ShopCity = resultString[0].ToString();
                                //商店名ShopName
                                ShopName = resultString[3].ToString().Replace("'", ".");
                                //商店没有所在区
                                ShopArea = "无";
                                //商店属于购物类别
                                ShopSort = resultString[2].ToString();

                                try
                                {   //商店联系电话
                                    int PhoneStart = strResult.IndexOf("电话:");
                                    //有电话走这边,没电话不添加
                                    if (PhoneStart.ToString() != "-1")
                                    {
                                        int PhoneEnd = strResult.IndexOf(@"<div Update"">");
                                        ShopPhone = DropHTMLTag(strResult.Substring(PhoneStart, PhoneEnd - PhoneStart)).Replace("电话:", "").Replace("&nbsp;", "");

                                    }
                                    else
                                    {
                                        ShopPhone = "无联系电话";
                                    }
                                }
                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }


                                try
                                {
                                    //分类标签ShopTag
                                    int TagStart = strResult.IndexOf(@"<div ShopTag"">");
                                    int TagEnd = strResult.IndexOf(@"<div Reviews"">");

                                    ShopTag = DropHTMLTag(strResult.Substring(TagStart, TagEnd - TagStart)).Replace("&nbsp;", "");
                                }

                                catch (Exception ex)
                                {

                                    TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                                    tw.Flush();
                                    tw.WriteLine("错误原因:" + ex.Message + Url);
                                    tw.WriteLine("-------------------------------------------");
                                    tw.Flush();
                                    tw.Close();
                                    tw = null;

                                }


                            }


                        }
                        catch (Exception ex)
                        {

                            TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                            tw.Flush();
                            tw.WriteLine("错误原因:" + ex.Message + Url);
                            tw.WriteLine("-------------------------------------------");
                            tw.Flush();
                            tw.Close();
                            tw = null;

                        }

                        //插入数据库
                        StrSql = "insert into ShopInfo values(" + ShopID + ",'" + ShopName + "','" + ShopAddress + "','" + ShopPhone + "','" + ShopCity + "','" + ShopArea + "','" + ShopSort + "','" + ShopTag + "')";
                        log = DBHelper.ExecuteSql(StrSql) + log;
                       

                    }
                    else
                    {

                    }
                    continue;
                }
            }
            catch (Exception ex)
            {

                TextWriter tw = new StreamWriter("D:\\WebBee\\ShoppingErrorlog.log", true);
                tw.Flush();
                tw.WriteLine("错误原因:" + ex.Message);
                tw.WriteLine("-------------------------------------------");
                tw.Flush();
                tw.Close();
                tw = null;
               
            }       
            finally
            {

                MessageBox.Show("完成" + System.DateTime.Now.ToString("hh时mm分ss秒"));
            }
           
          
           
           
        }
        //获取网源码方法
        public string GetHtmlCode(string Url)
        {
            Encoding encoding = Encoding.GetEncoding("utf-8");
            string strResult = "";
            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                request.Timeout = 30000;
                request.Headers.Set("Pragma", "no-cache");
                request.CookieContainer = new CookieContainer();
                request.Credentials = CredentialCache.DefaultCredentials;
                request.Referer = Url;
                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream streamReceive = response.GetResponseStream();
                StreamReader streamReader = new StreamReader(streamReceive, encoding);
                strResult = streamReader.ReadToEnd();
                streamReceive.Close();
                streamReader.Close();
                streamReceive = null;
                streamReader = null;
            }
            catch (Exception ex)
            {
               
            }
            return strResult;
        }
        //提供BooL判断是否继续
        public bool ReturnMsg(string strResult)
        {   //获取ID
            int iTitleStart = strResult.IndexOf("<title>");
            int iTitleEnd = strResult.IndexOf("</title>");
            string StrWeb = strResult.Substring(iTitleStart, iTitleEnd - iTitleStart);
            string StrTitle = DropHTMLTag(StrWeb);
            //获取时候是购物类型网StrSort
            string StrSort="";
           
            try
            {
                string divStart = @"<div ShopGuide"">";
                string divEnd = @"<div Reviews"">";
                int iDivStart = strResult.IndexOf(divStart);
                int iDivEnd = strResult.IndexOf(divEnd);
                string NavigationInfo = @"<div Shop"">";
                int NavigationEnd = strResult.IndexOf(NavigationInfo);
                string Navigation = strResult.Substring(iDivStart, NavigationEnd - iDivStart);
                string str = DropHTMLTag(Navigation).Replace("&nbsp;", "ml").Replace(">", "ml");
                string ml = "ml";
                string[] resultString= Regex.Split(str, ml, RegexOptions.IgnoreCase);
                StrSort = resultString[1].ToString();
            }
            catch
            {
                StrSort = "出错";
 
            }
            finally
            {
               
            }
            if (StrSort!="购物"||StrTitle == "该商户不存在或已被删除" )
            {
                return false;
            }
            else
            {
                return true;
            }

        }
     
    }     

}

原文地址:https://www.cnblogs.com/hfzsjz/p/1656722.html