Crawler 爬虫

using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Crawler.Model;

namespace Crawler.DataService
{
    public class CategoryRepository //: IRepository<Commodity>
    {
        private Logger logger = new Logger(typeof(CategoryRepository));

        public void Save(List<Category> categoryList)
        {
            SqlHelper.InsertList<Category>(categoryList, "Category");
            new Action<List<Category>>(SaveList).BeginInvoke(categoryList, null, null);
        }

        /// <summary>
        /// 根据Level获取类别列表
        /// </summary>
        /// <param name="level"></param>
        /// <returns></returns>
        public List<Category> QueryListByLevel(int level)
        {
            string sql = string.Format("SELECT * FROM category WHERE categorylevel={0};", level);
            return SqlHelper.QueryList<Category>(sql);
        }


        /// <summary>
        /// 存文本记录的
        /// </summary>
        /// <param name="categoryList"></param>
        public void SaveList(List<Category> categoryList)
        {
            StreamWriter sw = null;
            try
            {
                string recordFileName = string.Format("{0}_Category.txt", DateTime.Now.ToString("yyyyMMddHHmmss"));
                string totolPath = Path.Combine(ObjectFactory.DataPath, recordFileName);
                if (!Directory.Exists(Path.GetDirectoryName(totolPath)))
                {
                    Directory.CreateDirectory(Path.GetDirectoryName(totolPath));
                    sw = File.CreateText(totolPath);
                }
                else
                {
                    sw = File.AppendText(totolPath);
                }

                sw.WriteLine(JsonConvert.SerializeObject(categoryList));
            }
            catch (Exception e)
            {
                logger.Error("CategoryRepository.SaveList出现异常", e);
            }
            finally
            {
                if (sw != null)
                {
                    sw.Flush();
                    sw.Close();
                    sw.Dispose();
                }
            }
        }
    }
}
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Crawler.Model;

namespace Crawler.DataService
{
    public class CommodityRepository //: IRepository<Commodity>
    {
        private Logger logger = new Logger(typeof(CommodityRepository));

        public void SaveList(List<Commodity> commodityList)
        {
            if (commodityList == null || commodityList.Count == 0) return;
            IEnumerable<IGrouping<string, Commodity>> group = commodityList.GroupBy<Commodity, string>(c => GetTableName(c));

            foreach (var data in group)
            {
                SqlHelper.InsertList<Commodity>(data.ToList(), data.Key);
            }
        }

        private string GetTableName(Commodity commodity)
        {
            return string.Format("JD_Commodity_{0}", (commodity.ProductId % 30 + 1).ToString("000"));
        }

        /// <summary>
        /// 保存文本记录
        /// </summary>
        /// <param name="commodityList"></param>
        /// <param name="category"></param>
        /// <param name="page"></param>
        public void SaveList(List<Commodity> commodityList, Category category, int page)
        {
            StreamWriter sw = null;
            try
            {
                string recordFileName = string.Format("{0}/{1}/{2}/{3}.txt", category.CategoryLevel, category.ParentCode, category.Id, page);
                string totolPath = Path.Combine(ObjectFactory.DataPath, recordFileName);
                if (!Directory.Exists(Path.GetDirectoryName(totolPath)))
                {
                    Directory.CreateDirectory(Path.GetDirectoryName(totolPath));
                    sw = File.CreateText(totolPath);
                }
                else
                {
                    sw = File.AppendText(totolPath);
                }
                sw.WriteLine(JsonConvert.SerializeObject(commodityList));
            }
            catch (Exception e)
            {
                logger.Error("CommodityRepository.SaveList出现异常", e);
            }
            finally
            {
                if (sw != null)
                {
                    sw.Flush();
                    sw.Close();
                    sw.Dispose();
                }
            }
        }
    }
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

using Crawler;

namespace Crawler.DataService
{
    public class DBInit
    {
        private static Logger logger = new Logger(typeof(DBInit));

        /// <summary>
        /// 谨慎使用  会全部删除数据库并重新创建!
        /// </summary>
        public void InitCommodityTable()
        {
            #region Delete
            try
            {
                StringBuilder sb = new StringBuilder();
                for (int i = 1; i < 31; i++)
                {
                    sb.AppendFormat("DROP TABLE [dbo].[JD_Commodity_{0}];", i.ToString("000"));
                }
                SqlHelper.ExecuteNonQuery(sb.ToString());
            }
            catch (Exception ex)
            {
                if (ex.Message.Contains("因为它不存在,或者您没有所需的权限。"))
                {
                    logger.Warn("初始化数据库InitCommodityTable删除的时候,原表不存在");
                }
                else
                {
                    logger.Error("初始化数据库InitCommodityTable失败", ex);
                    throw ex;
                }
            }
            #endregion Delete

            #region Create
            try
            {
                StringBuilder sb = new StringBuilder();
                for (int i = 1; i < 31; i++)
                {
                    sb.AppendFormat(@"CREATE TABLE [dbo].[JD_Commodity_{0}](
                                        [Id] [int] IDENTITY(1,1) NOT NULL,
                                        [ProductId] [bigint] NULL,
                                        [CategoryId] [int] NULL,
                                        [Title] [nvarchar](500) NULL,
                                        [Price] [decimal](18, 2) NULL,
                                        [Url] [varchar](1000) NULL,
                                        [ImageUrl] [varchar](1000) NULL,
                             CONSTRAINT [PK_JD_Commodity_{0}] PRIMARY KEY CLUSTERED 
                            (
                                [Id] ASC
                            )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
                            ) ON [PRIMARY];", i.ToString("000"));
                }
                SqlHelper.ExecuteNonQuery(sb.ToString());
            }
            catch (Exception ex)
            {
                logger.Error("InitCommodityTable创建异常", ex);
                throw ex;
            }
            #endregion Create
        }

        /// <summary>
        /// 谨慎使用  会全部删除数据库并重新创建!
        /// </summary>
        public void InitCategoryTable()
        {
            #region Delete
            try
            {
                StringBuilder sb = new StringBuilder();
                sb.AppendFormat("DROP TABLE [dbo].[Category];");
                SqlHelper.ExecuteNonQuery(sb.ToString());
            }
            catch (Exception ex)
            {
                if (ex.Message.Equals("无法对 表 'dbo.Category' 执行 删除,因为它不存在,或者您没有所需的权限。"))
                {
                    logger.Warn("初始化数据库InitCategoryTable删除的时候,原表不存在");
                }
                else
                {
                    logger.Error("初始化数据库InitCategoryTable失败", ex);
                    throw ex;
                }
            }
            #endregion Delete

            #region Create
            try
            {
                StringBuilder sb = new StringBuilder();
                sb.AppendFormat(@"CREATE TABLE [dbo].[Category](
                                        [Id] [int] IDENTITY(1,1) NOT NULL,
                                        [Code] [varchar](100) NULL,
                                        [ParentCode] [varchar](100) NULL,
                                        [CategoryLevel] [int] NULL,
                                        [Name] [nvarchar](50) NULL,
                                        [Url] [varchar](1000) NULL,
                                        [State] [int] NULL,
                                      CONSTRAINT [PK_Category] PRIMARY KEY CLUSTERED 
                                     (
                                         [Id] ASC
                                     )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
                                     ) ON [PRIMARY];");

                SqlHelper.ExecuteNonQuery(sb.ToString());
            }
            catch (Exception ex)
            {
                logger.Error("初始化数据库InitCategoryTable 创建失败", ex);
                throw ex;
            }
            #endregion Create

        }
    }
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Crawler.DataService
{
        public interface IRepository<T> where T : class//, new()
        {
            void Save(T entity);
            void SaveList(List<T> entity);
        }
}
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Crawler.DataService
{
    public class ObjectFactory
    {
         public static string DataPath = ConfigurationManager.AppSettings["DataPath"];
    }
}
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Data;
using System.Data.SqlClient;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;

namespace Crawler.DataService
{
    public class SqlHelper
    {
        private static Logger logger = new Logger(typeof(SqlHelper));
        private static string ConnStr = ConfigurationManager.ConnectionStrings["mvc5"].ConnectionString;

        /// <summary>
        /// 事务执行
        /// </summary>
        /// <param name="sql"></param>
        public static void ExecuteNonQuery(string sql)
        {
            using (SqlConnection sqlConn = new SqlConnection(ConnStr))
            {
                sqlConn.Open();
                SqlCommand cmd = new SqlCommand(sql, sqlConn);
                cmd.ExecuteNonQuery();//.ExecuteNonQueryAsync();//
            }
        }

        public static void ExecuteNonQueryWithTrans(string sql)
        {
            SqlTransaction trans = null;
            try
            {
                using (SqlConnection sqlConn = new SqlConnection(ConnStr))
                {
                    sqlConn.Open();
                    trans = sqlConn.BeginTransaction();
                    SqlCommand cmd = new SqlCommand(sql, sqlConn, trans);
                    cmd.ExecuteNonQuery();//.ExecuteNonQueryAsync();//
                    trans.Commit();
                }
            }
            catch (Exception ex)
            {
                //logger.Error(string.Format("ExecuteNonQueryWithTrans出现异常,sql={0}", sql), ex);
                if (trans != null && trans.Connection != null)
                    trans.Rollback();
                throw ex;
            }
            finally
            {
            }
        }

        public static List<T> QueryList<T>(string sql) where T : new()
        {
            using (SqlConnection sqlConn = new SqlConnection(ConnStr))
            {
                sqlConn.Open();
                SqlCommand cmd = new SqlCommand(sql, sqlConn);
                return TransList<T>(cmd.ExecuteReader());
            }
        }

        public static void Insert<T>(T model, string tableName) where T : new()
        {
            string sql = GetInsertSql<T>(model, tableName);
            ExecuteNonQuery(sql);
        }

        public static void InsertList<T>(List<T> list, string tableName) where T : new()
        {
            string sql = string.Join(" ", list.Select(t => GetInsertSql<T>(t, tableName)));
            ExecuteNonQuery(sql);
        }

        #region Private
        private static string GetInsertSql<T>(T model, string tableName)
        {
            StringBuilder sbSql = new StringBuilder();

            StringBuilder sbFields = new StringBuilder();
            StringBuilder sbValues = new StringBuilder();

            Type type = model.GetType();
            var properties = type.GetProperties();
            foreach (PropertyInfo p in properties)
            {
                string name = p.Name;
                if (!name.Equals("id", StringComparison.OrdinalIgnoreCase))
                {
                    sbFields.AppendFormat("[{0}],", name);
                    string sValue = null;
                    object oValue = p.GetValue(model);
                    if (oValue != null)
                        sValue = oValue.ToString().Replace("'", "");
                    sbValues.AppendFormat("'{0}',", sValue);
                }
            }
            sbSql.AppendFormat("INSERT INTO {0} ({1}) VALUES ({2});", tableName, sbFields.ToString().TrimEnd(','), sbValues.ToString().TrimEnd(','));
            return sbSql.ToString();
        }

        private static List<T> TransList<T>(SqlDataReader reader) where T : new()
        {
            List<T> tList = new List<T>();
            Type type = typeof(T);
            var properties = type.GetProperties();
            if (reader.Read())
            {
                do
                {
                    T t = new T();
                    foreach (PropertyInfo p in properties)
                    {
                        p.SetValue(t, Convert.ChangeType(reader[p.Name], p.PropertyType));
                    }
                    tList.Add(t);
                }
                while (reader.Read());
            }
            return tList;
        }

        private static T TransModel<T>(SqlDataReader reader) where T : new()
        {
            T t = new T();
            if (reader.Read())
            {
                do
                {
                    Type type = typeof(T);
                    var properties = type.GetProperties();
                    foreach (PropertyInfo p in properties)
                    {
                        p.SetValue(t, Convert.ChangeType(reader[p.Name], p.PropertyType));
                    }
                }
                while (reader.Read());
            }
            return t;
        }
        #endregion Private
    }
}
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

using Crawler.Model;

namespace Crawler.DataService
{
    public class WarnRepository //: IRepository<Commodity>
    {
        private Logger logger = new Logger(typeof(WarnRepository));
        public void SaveWarn(Category category, string msg)
        {
            StreamWriter sw = null;
            try
            {
                string recordFileName = string.Format("warn/{0}/{1}/{2}.txt", category.CategoryLevel, category.ParentCode, category.Id);
                string totolPath = Path.Combine(ObjectFactory.DataPath, recordFileName);
                if (!Directory.Exists(Path.GetDirectoryName(totolPath)))
                {
                    Directory.CreateDirectory(Path.GetDirectoryName(totolPath));
                    sw = File.CreateText(totolPath);
                }
                else
                {
                    sw = File.AppendText(totolPath);
                }
                sw.WriteLine(msg);
                sw.WriteLine(JsonConvert.SerializeObject(JsonConvert.SerializeObject(category)));
            }
            catch (Exception e)
            {
                logger.Error("SaveWarn出现异常", e);
            }
            finally
            {
                if (sw != null)
                {
                    sw.Flush();
                    sw.Close();
                    sw.Dispose();
                }
            }
        }
    }
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Crawler.Model
{
    public class BaseModel
    {
        public int Id { get; set; }
    }
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Crawler.Model
{
    public class Category:BaseModel
    {
        public string Code { get; set; }
        public string ParentCode { get; set; }
        public string Name { get; set; }
        public string Url { get; set; }
        public int CategoryLevel { get; set; }
        public int State { get; set; }
    }
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Crawler.Model
{
    public class Commodity : BaseModel
    {
        public long ProductId { get; set; }
        public int CategoryId { get; set; }
        public string Title { get; set; }
        public decimal Price { get; set; }
        public string Url { get; set; }
        public string ImageUrl { get; set; }
    }

    //jQuery5427073([{"id":"J_1707419","p":"5149.00","m":"5499.00"},{"id":"J_1589214","p":"1999.00","m":"2999.00"},{"id":"J_1546310","p":"3999.00","m":"4999.00"},{"id":"J_1510479","p":"2999.00","m":"3569.00"},{"id":"J_1707420","p":"4149.00","m":"4499.00"},{"id":"J_1770620","p":"2099.00","m":"2499.00"},{"id":"J_1258277","p":"2699.00","m":"3299.00"},{"id":"J_1707423","p":"4599.00","m":"4705.00"},{"id":"J_1252778","p":"3099.00","m":"4199.00"},{"id":"J_1553732","p":"3298.00","m":"4598.00"},{"id":"J_1576022","p":"2999.00","m":"3999.00"},{"id":"J_1420120","p":"1999.00","m":"2899.00"},{"id":"J_647948","p":"1299.00","m":"1698.00"},{"id":"J_1044476","p":"1999.00","m":"2999.00"},{"id":"J_1376591","p":"1299.00","m":"1599.00"},{"id":"J_1416294","p":"4599.00","m":"5898.00"},{"id":"J_1455427","p":"1499.00","m":"1999.00"},{"id":"J_1253502","p":"2799.00","m":"3999.00"},{"id":"J_1553624","p":"2998.00","m":"4398.00"},{"id":"J_1301951","p":"2279.00","m":"3999.00"},{"id":"J_1115374","p":"2499.00","m":"4299.00"},{"id":"J_671315","p":"1999.00","m":"2898.00"},{"id":"J_1283945","p":"3099.00","m":"4199.00"},{"id":"J_1283940","p":"2499.00","m":"2999.00"},{"id":"J_1027317","p":"2799.00","m":"5999.00"},{"id":"J_1314962","p":"3699.00","m":"5199.00"},{"id":"J_1565150","p":"4068.00","m":"5727.00"},{"id":"J_1565175","p":"3788.00","m":"5377.00"},{"id":"J_1565182","p":"3938.00","m":"5757.00"},{"id":"J_1209084","p":"3599.00","m":"4999.00"}]);
    public class CommodityPrice
    {
        public string id { get; set; }
        public decimal p { get; set; }
        public decimal m { get; set; }
    }
}
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace Crawler
{
    /// <summary>
    /// http://tool.sufeinet.com/HttpHelper.aspx
    /// </summary>
    public class HttpHelper
    {
        private static Logger logger = new Logger(typeof(HttpHelper));

        /// <summary>
        /// 根据url下载内容  之前是GB2312
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static string DownloadUrl(string url)
        {
            return DownloadHtml(url, Encoding.UTF8);
        }

        /// <summary>
        /// 下载html
        /// http://tool.sufeinet.com/HttpHelper.aspx
        /// HttpWebRequest功能比较丰富,WebClient使用比较简单
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static string DownloadHtml(string url, Encoding encode)
        {
            string html = string.Empty;
            try
            {
                HttpWebRequest request = HttpWebRequest.Create(url) as HttpWebRequest;//模拟请求
                request.Timeout = 30 * 1000;//设置30s的超时
                request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36";
                request.ContentType = "text/html; charset=utf-8";// "text/html;charset=gbk";// 
                //request.Host = "search.yhd.com";

                //request.Headers.Add("Cookie", @"newUserFlag=1; guid=YFT7C9E6TMFU93FKFVEN7TEA5HTCF5DQ26HZ; gray=959782; cid=av9kKvNkAPJ10JGqM_rB_vDhKxKM62PfyjkB4kdFgFY5y5VO; abtest=31; _ga=GA1.2.334889819.1425524072; grouponAreaId=37; provinceId=20; search_showFreeShipping=1; rURL=http%3A%2F%2Fsearch.yhd.com%2Fc0-0%2Fkiphone%2F20%2F%3Ftp%3D1.1.12.0.73.Ko3mjRR-11-FH7eo; aut=5GTM45VFJZ3RCTU21MHT4YCG1QTYXERWBBUFS4; ac=57265177%40qq.com; msessionid=H5ACCUBNPHMJY3HCK4DRF5VD5VA9MYQW; gc=84358431%2C102362736%2C20001585%2C73387122; tma=40580330.95741028.1425524063040.1430288358914.1430790348439.9; tmd=23.40580330.95741028.1425524063040.; search_browse_history=998435%2C1092925%2C32116683%2C1013204%2C6486125%2C38022757%2C36224528%2C24281304%2C22691497%2C26029325; detail_yhdareas=""; cart_cookie_uuid=b64b04b6-fca7-423b-b2d1-ff091d17e5e5; gla=20.237_0_0; JSESSIONID=14F1F4D714C4EE1DD9E11D11DDCD8EBA; wide_screen=1; linkPosition=search");

                //request.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
                //request.Headers.Add("Accept-Encoding", "gzip, deflate, sdch");
                //request.Headers.Add("Referer", "http://list.yhd.com/c0-0/b/a-s1-v0-p1-price-d0-f0-m1-rt0-pid-mid0-kiphone/");

                //Encoding enc = Encoding.GetEncoding("GB2312"); // 如果是乱码就改成 utf-8 / GB2312

                using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)//发起请求
                {
                    if (response.StatusCode != HttpStatusCode.OK)
                    {
                        logger.Warn(string.Format("抓取{0}地址返回失败,response.StatusCode为{1}", url, response.StatusCode));
                    }
                    else
                    {
                        try
                        {
                            StreamReader sr = new StreamReader(response.GetResponseStream(), encode);
                            html = sr.ReadToEnd();//读取数据
                            sr.Close();
                        }
                        catch (Exception ex)
                        {
                            logger.Error(string.Format("DownloadHtml抓取{0}保存失败", url), ex);
                            html = null;
                        }
                    }
                }
            }
            catch (System.Net.WebException ex)
            {
                if (ex.Message.Equals("远程服务器返回错误: (306)。"))
                {
                    logger.Error("远程服务器返回错误: (306)。", ex);
                    return null;
                }
            }
            catch (Exception ex)
            {
                logger.Error(string.Format("DownloadHtml抓取{0}出现异常", url), ex);
                html = null;
            }
            return html;
        }
    }
}
using System;
using System.IO;
using System.Diagnostics;
using System.Configuration;
using log4net.Config;
using log4net;
using Newtonsoft.Json;
using System.Collections.Generic;

namespace Crawler
{
    public class Logger
    {
        static Logger()
        {
            XmlConfigurator.Configure(new FileInfo(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "CfgFiles\log4net.cfg.xml")));
            ILog Log = LogManager.GetLogger(typeof(Logger));
            Log.Info("系统初始化Logger模块");
        }

        private ILog loger = null;
        public Logger(Type type)
        {
            loger = LogManager.GetLogger(type);
        }

        /// <summary>
        /// Log4日志
        /// </summary>
        /// <param name="msg"></param>
        /// <param name="ex"></param>
        public void Error(string msg = "出现异常", Exception ex = null)
        {
            Console.WriteLine(msg);
            loger.Error(msg, ex);
        }

        /// <summary>
        /// Log4日志
        /// </summary>
        /// <param name="msg"></param>
        public void Warn(string msg)
        {
            Console.WriteLine(msg);
            loger.Warn(msg);
        }

        /// <summary>
        /// Log4日志
        /// </summary>
        /// <param name="msg"></param>
        public void Info(string msg)
        {
            Console.WriteLine(msg);
            loger.Info(msg);
        }

        /// <summary>
        /// Log4日志
        /// </summary>
        /// <param name="msg"></param>
        public void Debug(string msg )
        {
            Console.WriteLine(msg);
            loger.Debug(msg);
        }

        
    }
}
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;

using Crawler.Model;

namespace Crawler
{
    public class CategorySearch
    {

        private static Logger logger = new Logger(typeof(CategorySearch));
        private static int count = 1;

        public static List<Category> Crawler(string url)
        {
            List<Category> categoryList = new List<Category>();
            try
            {
                string html = HttpHelper.DownloadUrl(url);

                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);
                string fristPath = "//*[@class='category-item m']";
                HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(fristPath);
                int k = 1;
                foreach (HtmlNode node in nodeList)
                {
                    categoryList.AddRange(First(node.InnerHtml, k++.ToString("00") + "f", "root"));
                }

            }
            catch (Exception ex)
            {
                logger.Error("CrawlerMuti出现异常", ex);
            }
            return categoryList;
        }

        /// <summary>
        /// 对每一个一级类进行查找
        /// </summary>
        /// <param name="html"></param>
        /// <param name="code"></param>
        /// <param name="parentCode"></param>
        /// <returns></returns>
        private static List<Category> First(string html, string code, string parentCode)
        {
            List<Category> categoryList = new List<Category>();
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(html);
            string path = "//*[@class='mt']/h2/span";
            HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(path);
            foreach (HtmlNode node in nodeList)
            {
                Category category = new Category()
                {
                    Id = count++,
                    State = 0,
                    CategoryLevel = 1,
                    Code = code,
                    ParentCode = parentCode
                };
                category.Name = node.InnerText;
                category.Url = "";// node.Attributes["href"].Value;
                categoryList.Add(category);
            }
            categoryList.AddRange(Second(html, code));
            return categoryList;
        }

        /// <summary>
        /// 在一个一级类下面的全部二级类进行查找
        /// </summary>
        /// <param name="html"></param>
        /// <param name="parentCode"></param>
        /// <returns></returns>
        private static List<Category> Second(string html, string parentCode)
        {
            List<Category> categoryList = new List<Category>();
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(html);
            string path = "//*[@class='items']/dl";
            HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(path);
            int k = 1;
            foreach (HtmlNode node in nodeList)
            {
                string code = string.Format("{0}{1}s", parentCode, k.ToString("00"));
                string secondHtml = node.InnerHtml;
                if (string.IsNullOrWhiteSpace(secondHtml)) continue;
                HtmlDocument secondDoc = new HtmlDocument();
                secondDoc.LoadHtml(secondHtml);
                Category category = new Category()
                {
                    Id = count++,
                    State = 0,
                    CategoryLevel = 2,
                    Code = code,
                    ParentCode = parentCode
                };


                HtmlNode secondNode = secondDoc.DocumentNode.SelectSingleNode("//dt/a");
                if (secondNode == null)//图书音像
                {
                    secondNode = secondDoc.DocumentNode.SelectSingleNode("//dt");
                }
                category.Name = secondNode.InnerText;
                if (secondNode.Attributes["href"] != null)
                {
                    category.Url = secondNode.Attributes["href"].Value;
                    if (!category.Url.StartsWith("http:"))
                    {
                        category.Url = string.Concat("http:", category.Url);
                    }
                }
                categoryList.Add(category);
                HtmlNode thirdNode = secondDoc.DocumentNode.SelectSingleNode("//dd");
                if (thirdNode == null) continue;
                categoryList.AddRange(Third(thirdNode.InnerHtml, code));
                k++;
            }
            return categoryList;
        }

        /// <summary>
        /// 在一个二级类下的全部三级类里面进行查找
        /// </summary>
        /// <param name="html"></param>
        /// <param name="parentCode"></param>
        /// <returns></returns>
        private static List<Category> Third(string html, string parentCode)
        {
            List<Category> categoryList = new List<Category>();
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(html);
            string path = "//a";
            HtmlNodeCollection nodeList = doc.DocumentNode.SelectNodes(path);
            if (nodeList == null || nodeList.Count == 0) return categoryList;
            int k = 1;
            foreach (HtmlNode node in nodeList)
            {
                string code = string.Format("{0}{1}t", parentCode, k.ToString("00"));
                Category category = new Category()
                {
                    Id = count++,
                    State = 0,
                    CategoryLevel = 3,
                    Code = code,
                    ParentCode = parentCode
                };
                category.Name = node.InnerText;
                category.Url = node.Attributes["href"].Value;
                if (!category.Url.StartsWith("http:"))
                {
                    category.Url = string.Concat("http:", category.Url);
                }
                categoryList.Add(category);
                k++;
            }
            return categoryList;
        }
    }
}
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;
using Crawler.DataService;

using Crawler.Model;

namespace Crawler
{
    public class CommoditySearch
    {
        private Logger logger = new Logger(typeof(CommoditySearch));
        private WarnRepository warnRepository = new WarnRepository();
        private CommodityRepository commodityRepository = new CommodityRepository();
        private Category category = null;

        public CommoditySearch(Category _category)
        {
            category = _category;
        }

        public void Crawler()
        {
            try
            {
                if (string.IsNullOrEmpty(category.Url))
                {
                    warnRepository.SaveWarn(category, string.Format("Url为空,Name={0} Level={1} Url={2}", category.Name, category.CategoryLevel, category.Url));
                    return;
                }
                string html = HttpHelper.DownloadUrl(category.Url);//下载html

                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);//加载html
                string pageNumberPath = @"//*[@id='J_topPage']/span/i";
                HtmlNode pageNumberNode = doc.DocumentNode.SelectSingleNode(pageNumberPath);
                if (pageNumberNode != null)
                {
                    string sNumber = pageNumberNode.InnerText;
                    for (int i = 1; i < int.Parse(sNumber) + 1; i++)
                    {
                        string pageUrl = string.Format("{0}&page={1}", category.Url, i);
                        try
                        {
                            List<Commodity> commodityList = GetCommodityList(category, pageUrl.Replace("&page=1&", string.Format("&page={0}&", i)));
                            //commodityRepository.SaveList(commodityList);
                        }
                        catch (Exception ex)//保证一页的错误不影响另外一页
                        {
                            logger.Error("Crawler的commodityRepository.SaveList(commodityList)出现异常", ex);
                        }
                    }
                }




                //string fristPath = "//*[@id='J_bottomPage']/span[1]/a";
                //HtmlNodeCollection noneNodeList = doc.DocumentNode.SelectNodes(fristPath);//xPath分析
                //if (noneNodeList == null)
                //{
                //    warnRepository.SaveWarn(category, string.Format("分页数据为空,Name={0} Level={1} Url={2}", category.Name, category.CategoryLevel, category.Url));
                //    return;
                //}

                //string pageUrl = null;
                //foreach (var node in noneNodeList)
                //{
                //    string sNum = node.InnerHtml;
                //    if (sNum.Equals("1"))
                //    {
                //        pageUrl = node.Attributes["href"].Value.Replace("&amp;", "&");
                //        if (!pageUrl.StartsWith("http://"))
                //            pageUrl = string.Format("http://list.jd.com{0}", pageUrl);
                //        break;
                //    }
                //}
                //string sMaxPageNumPath = "//*[@id='J_bottomPage']/span[2]/em[1]/b";
                //HtmlNode sMaxPageNumPathNode = doc.DocumentNode.SelectSingleNode(sMaxPageNumPath);
                //string sMaxPageNum = sMaxPageNumPathNode.InnerHtml;
                //for (int i = 1; i < int.Parse(sMaxPageNum) + 1; i++)
                //{
                //    try
                //    {
                //        List<Commodity> commodityList = GetCommodityList(category, pageUrl.Replace("&page=1&", string.Format("&page={0}&", i)));
                //        commodityRepository.SaveList(commodityList);
                //    }
                //    catch (Exception ex)//保证一页的错误不影响另外一页
                //    {
                //        logger.Error("Crawler的commodityRepository.SaveList(commodityList)出现异常", ex);
                //    }
                //}
            }
            catch (Exception ex)
            {
                logger.Error("CrawlerMuti出现异常", ex);
                warnRepository.SaveWarn(category, string.Format("出现异常,Name={0} Level={1} Url={2}", category.Name, category.CategoryLevel, category.Url));
            }
        }

        private List<Commodity> GetCommodityList(Category category, string url)
        {
            string html = HttpHelper.DownloadUrl(url);
            List<Commodity> commodityList = new List<Commodity>();
            try
            {
                if (string.IsNullOrEmpty(html)) return commodityList;
                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(html);
                string liPath = "//*[@id='plist']/ul/li";
                HtmlNodeCollection noneNodeList = doc.DocumentNode.SelectNodes(liPath);
                if (noneNodeList == null || noneNodeList.Count == 0)
                {
                    warnRepository.SaveWarn(category, string.Format("GetCommodityList商品数据为空,Name={0} Level={1} category.Url={2} url={3}", category.Name, category.CategoryLevel, category.Url, url));
                    return commodityList;
                }
                foreach (var node in noneNodeList)
                {
                    HtmlDocument docChild = new HtmlDocument();
                    docChild.LoadHtml(node.OuterHtml);

                    Commodity commodity = new Commodity()
                    {
                        CategoryId = category.Id
                    };

                    string urlPath = "//*[@class='p-name']/a";
                    HtmlNode urlNode = docChild.DocumentNode.SelectSingleNode(urlPath);
                    if (urlNode == null)
                    {
                        continue;
                    }
                    commodity.Url = urlNode.Attributes["href"].Value;
                    if (!commodity.Url.StartsWith("http:"))
                        commodity.Url = "http:" + commodity.Url;

                    string sId = Path.GetFileName(commodity.Url).Replace(".html", "");
                    commodity.ProductId = long.Parse(sId);

                    //*[@id="plist"]/ul/li[1]/div/div[3]/a/em
                    string titlePath = "//*[@class='p-name']/a/em";
                    HtmlNode titleNode = docChild.DocumentNode.SelectSingleNode(titlePath);
                    if (titleNode == null)
                    {
                        //Log.Error(titlePath);
                        continue;
                    }
                    commodity.Title = titleNode.InnerText;

                    string iamgePath = "//*[@class='p-img']/a/img";
                    HtmlNode imageNode = docChild.DocumentNode.SelectSingleNode(iamgePath);
                    if (imageNode == null)
                    {
                        continue;
                    }
                    if (imageNode.Attributes.Contains("src"))
                        commodity.ImageUrl = imageNode.Attributes["src"].Value;
                    else if (imageNode.Attributes.Contains("original"))
                        commodity.ImageUrl = imageNode.Attributes["original"].Value;
                    else if (imageNode.Attributes.Contains("data-lazy-img"))
                        commodity.ImageUrl = imageNode.Attributes["data-lazy-img"].Value;
                    else
                    {
                        continue;
                    }
                    if (!commodity.ImageUrl.StartsWith("http:"))
                        commodity.ImageUrl = "http:" + commodity.ImageUrl;


                    commodityList.Add(commodity);
                }
                Console.WriteLine("{0}一共获取了{1}条数据", url, commodityList.Count);
            }
            catch (Exception ex)
            {
                logger.Error(string.Format("GetCommodityList出现异常,url={0}", url), ex);
            }
            return GetCommodityPrice(category, commodityList);
        }

        /// <summary>
        /// 获取商品价格
        /// </summary>
        /// <param name="commodityList"></param>
        /// <returns></returns>
        private List<Commodity> GetCommodityPrice(Category category, List<Commodity> commodityList)
        {
            try
            {
                if (commodityList == null || commodityList.Count() == 0)
                    return commodityList;

                StringBuilder sb = new StringBuilder();
                //sb.Append(@"http://p.3.cn/prices/mgets?my=list_price&type=1&area=1_72_4137&skuIds=");
                //sb.Append(string.Join("%2C", commodityList.Select(c => string.Format("J_{0}", c.ProductId))));
                //
                sb.AppendFormat("http://p.3.cn/prices/mgets?callback=jQuery1069298&type=1&area=1_72_4137_0&skuIds={0}&pdbp=0&pdtk=&pdpin=&pduid=1945966343&_=1469022843655", string.Join("%2C", commodityList.Select(c => string.Format("J_{0}", c.ProductId))));
                string html = HttpHelper.DownloadUrl(sb.ToString());
                if (string.IsNullOrWhiteSpace(html))
                {
                    logger.Warn(string.Format("获取url={0}时获取的html为空", sb.ToString()));
                }
                html = html.Substring(html.IndexOf("(") + 1);
                html = html.Substring(0, html.LastIndexOf(")"));
                List<CommodityPrice> priceList = JsonConvert.DeserializeObject<List<CommodityPrice>>(html);
                commodityList.ForEach(c => c.Price = priceList.FirstOrDefault(p => p.id.Equals(string.Format("J_{0}", c.ProductId))).p);
                //commodityList.ForEach(c => Console.WriteLine(" Title={0}  ImageUrl={1} Url={2} Price={3} Id={4}", c.Title, c.ImageUrl, c.Url, c.Price, c.Id));
            }
            catch (Exception ex)
            {
                logger.Error("GetCommodityPrice出现异常", ex);
            }
            return commodityList;
        }
    }
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

using Newtonsoft.Json;
using Crawler.DataService;
using Crawler.Model;

namespace Crawler
{
    /// <summary>
    /// 1 爬虫,爬虫攻防
    /// 2 下载html
    /// 3 xpath解析html,获取数据和深度抓取
    /// 4 不一样的属性和ajax数据的获取
    /// 5 多线程爬虫
    /// </summary>
    class Program
    {
        private static Logger logger = new Logger(typeof(Program));
        static void Main(string[] args)
        {
            try
            {
                Console.WriteLine("今天是爬虫的学习");

                ////测试DownloadHtml
                //string html = HttpHelper.DownloadHtml(@"http://list.jd.com/list.html?cat=9987,653,655", Encoding.UTF8);

                ////测试获取分类页
                //string html1 = HttpHelper.DownloadHtml("http://www.jd.com/allSort.aspx", Encoding.UTF8);



                ////测试抓取商品列表
                //string testCategory = "{"Id":73,"Code":"02f01s01T","ParentCode":"02f01s","Name":"烟机/灶具","Url":"http://list.jd.com/list.html?cat=737,13297,1300","Level":3}";
                //new CommoditySearch(JsonConvert.DeserializeObject<Category>(testCategory)).Crawler();

                //List<Category> categoryList = CategorySearch.Crawler("http://www.jd.com/allSort.aspx");

                //Crawl();
            }
            catch (Exception ex)
            {
                logger.Error("异常啦,", ex);
                Console.WriteLine("*****************木有成功**********************");
                Console.ReadLine();
            }
            Console.ReadLine();
        }

        /// <summary>
        /// 抓取
        /// </summary>
        private static void Crawl()
        {
            DBInit dbInit = new DBInit();
            CategoryRepository categoryRepository = new CategoryRepository();
            Console.WriteLine("请输入Y/N进行类别表初始化确认! Y 删除Category表然后重新创建,然后抓取类型数据,N(或者其他)跳过");
            string input = Console.ReadLine();
            if (input.Equals("Y", StringComparison.OrdinalIgnoreCase))
            {
                dbInit.InitCategoryTable();
                List<Category> categoryList = CategorySearch.Crawler("http://www.jd.com/allSort.aspx");

                categoryRepository.Save(categoryList);
                Console.WriteLine("类型数据初始化完成,共抓取类别{0}个", categoryList.Count);
            }
            else
            {
                Console.WriteLine("你选择不初始化类别数据");
            }
            Console.WriteLine("*****************^_^**********************");

            Console.WriteLine("请输入Y/N进行商品数据初始化确认! Y 删除全部商品表表然后重新创建,然后抓取商品数据,N(或者其他)跳过");
            input = Console.ReadLine();
            if (input.Equals("Y", StringComparison.OrdinalIgnoreCase))
            {
                dbInit.InitCommodityTable();
                CrawlerCommodity();
            }
            Console.WriteLine("*****************^_^**********************");
            //CleanAll();
        }

        /// <summary>
        /// 抓取商品
        /// </summary>
        private static void CrawlerCommodity()
        {
            Console.WriteLine("{0} jd商品开始抓取 - -", DateTime.Now);
            CategoryRepository categoryRepository = new CategoryRepository();
            List<Category> categoryList = categoryRepository.QueryListByLevel(3);

            List<Task> taskList = new List<Task>();
            TaskFactory taskFactory = new TaskFactory();
            foreach (Category category in categoryList)
            {
                CommoditySearch searcher = new CommoditySearch(category);
                //searcher.Crawler();
                taskList.Add(taskFactory.StartNew(searcher.Crawler));
                if (taskList.Count > 15)
                {
                    taskList = taskList.Where(t => !t.IsCompleted && !t.IsCanceled && !t.IsFaulted).ToList();
                    Task.WaitAny(taskList.ToArray());
                }
            }
            Task.WaitAll(taskList.ToArray());
            Console.WriteLine("{0} jd商品抓取全部完成 - -", DateTime.Now);
            CleanAll();
        }

        /// <summary>
        /// 清理数据
        /// </summary>
        private static void CleanAll()
        {
            try
            {
                Console.WriteLine("{0} 开始清理重复数据 - -", DateTime.Now);
                StringBuilder sb = new StringBuilder();
                for (int i = 1; i < 31; i++)
                {
                    sb.AppendFormat(@"DELETE FROM [dbo].[JD_Commodity_{0}] where productid IN(select productid from [dbo].[JD_Commodity_{0}] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_{0}] group by productid,CategoryId having count(0)>1);", i.ToString("000"));
                }
                #region
                /*
                 DELETE FROM [dbo].[JD_Commodity_001] where productid IN(select productid from [dbo].[JD_Commodity_001] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_001] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_002] where productid IN(select productid from [dbo].[JD_Commodity_002] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_002] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_003] where productid IN(select productid from [dbo].[JD_Commodity_003] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_003] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_004] where productid IN(select productid from [dbo].[JD_Commodity_004] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_004] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_005] where productid IN(select productid from [dbo].[JD_Commodity_005] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_005] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_006] where productid IN(select productid from [dbo].[JD_Commodity_006] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_006] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_007] where productid IN(select productid from [dbo].[JD_Commodity_007] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as IDv from [dbo].[JD_Commodity_007] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_008] where productid IN(select productid from [dbo].[JD_Commodity_008] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_008] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_009] where productid IN(select productid from [dbo].[JD_Commodity_009] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_009] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_010] where productid IN(select productid from [dbo].[JD_Commodity_010] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_010] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_011] where productid IN(select productid from [dbo].[JD_Commodity_011] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_011] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_012] where productid IN(select productid from [dbo].[JD_Commodity_012] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_012] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_013] where productid IN(select productid from [dbo].[JD_Commodity_013] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_013] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_014] where productid IN(select productid from [dbo].[JD_Commodity_014] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_014] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_015] where productid IN(select productid from [dbo].[JD_Commodity_015] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_015] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_016] where productid IN(select productid from [dbo].[JD_Commodity_016] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_016] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_017] where productid IN(select productid from [dbo].[JD_Commodity_017] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_017] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_018] where productid IN(select productid from [dbo].[JD_Commodity_018] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_018] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_019] where productid IN(select productid from [dbo].[JD_Commodity_019] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_019] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_020] where productid IN(select productid from [dbo].[JD_Commodity_020] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_020] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_021] where productid IN(select productid from [dbo].[JD_Commodity_021] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_021] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_022] where productid IN(select productid from [dbo].[JD_Commodity_022] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_022] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_023] where productid IN(select productid from [dbo].[JD_Commodity_023] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_023] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_024] where productid IN(select productid from [dbo].[JD_Commodity_024] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_024] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_025] where productid IN(select productid from [dbo].[JD_Commodity_025] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_025] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_026] where productid IN(select productid from [dbo].[JD_Commodity_026] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_026] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_027] where productid IN(select productid from [dbo].[JD_Commodity_027] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_027] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_028] where productid IN(select productid from [dbo].[JD_Commodity_028] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_028] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_029] where productid IN(select productid from [dbo].[JD_Commodity_029] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_029] group by productid,CategoryId having count(0)>1);DELETE FROM [dbo].[JD_Commodity_030] where productid IN(select productid from [dbo].[JD_Commodity_030] group by productid,CategoryId having count(0)>1)
                                AND ID NOT IN(select max(ID) as ID from [dbo].[JD_Commodity_030] group by productid,CategoryId having count(0)>1);
                 */
                #endregion
                Console.WriteLine("执行清理sql:{0}", sb.ToString());
                SqlHelper.ExecuteNonQuery(sb.ToString());
                Console.WriteLine("{0} 完成清理重复数据 - -", DateTime.Now);
            }
            catch (Exception ex)
            {
                logger.Error("CleanAll出现异常", ex);
            }
            finally
            {
                Console.WriteLine("{0} 结束清理重复数据 - -", DateTime.Now);
            }
        }
    }
}
原文地址:https://www.cnblogs.com/zhengqian/p/8655401.html