【ShoppingPeeker】-基于Webkit内核的爬虫蜘蛛引擎 ShoppingWebCrawler的姊妹篇-可视化任务Web管理

ShoppingPeeker

这个项目是蜘蛛项目的可视化任务站点。

项目github地址:ShoppingPeeker

开发语言:C#

开发工具:Visual Studio 2017 +.Net Core2.1

运行平台:Linux/Windows

# 概述 ShoppingPeeker 是项目:ShoppingWebCrawler的可视化任务工具。

项目使用.net core2.x进行构建。可以运行在Windwos/Linux/Mac平台。

项目采用Socket通信模式,实现本地采集任务与蜘蛛服务器进行通信。稳定高效。

如何部署?

1、克隆ShoppingWebCrawler项目到本地。如:d:srcShoppingWebCrawler;

2、克隆ShoppingPeeker项目到本地。d:srcShoppingPeeker;

(注意:两个项目必须在同一个目录下,因为使用了文件编译引用!!!!)

3、启动蜘蛛 ShoppingWebCrawler。

4、使用visual studio 2017   或者cmd/powershell 进入项目文件夹   使用命令:dotnet build;dotnet run

5、恭喜,项目成功启动,示范站点的输入框,是一个根据输入的商品词,抓取对应电商平台的商品列表的功能示范!

项目构成

ShoppingPeeker.Web: .net core 2.x asp.net mvc 站点 插件模式: 不同的电商平台使用插件模式进行采集任务的解析。在站点启动的时候,扫描插件目录。对插件进行附加,并监视查询的变更!

数据持久化

基于Dapper的数据交互DataAccess.封装对Linq 方式,实现对原始表数据的 增删改查操作。

TCP 进行蜘蛛通信示范

封装TCP通信,基于ADP.NET的连接方式,上手简单容易,可配置。

基本通信:

var connStr = ConfigHelper.WebCrawlerSection.ConnectionStringCollection.First();

            using (var conn = new SoapTcpConnection(connStr))
            {
                if (conn.State == ConnectionState.Closed)
                {
                    conn.Open();
                }

                ///发送ping

                var str = conn.SendString(CommandConstants.CMD_Ping);
                resut = string.Concat("time :", DateTime.Now.ToString(), "; tcp server response: ", str);
            }

采集请求:

var connStrConfig = webArgs.SystemAttachParas["SoapTcpConnectionString"] as WebCrawlerConnection;

                    //重写解析地址-首页的分片jsonp地址
                    string urlOfSlicedJsonp = this.ResolveSlicedSearchPageSilcedUrl(webArgs, next_start, show_items);
                    webArgs.ResolvedUrl = new ResolvedSearchUrlWithParas { Url = urlOfSlicedJsonp };
                    using (var conn = new SoapTcpConnection(connStrConfig))
                    {
                        if (conn.State == ConnectionState.Closed)
                        {
                            conn.Open();
                        }

                        //发送soap
                        var soapCmd = new SoapMessage() { Head = CommandConstants.CMD_FetchPage };
                        soapCmd.Body = JsonConvert.SerializeObject(webArgs);
                        var dataContainer = conn.SendSoapMessage(soapCmd);
                        if (null != dataContainer && dataContainer.Status == 1)
                        {
                            htmlItemsContent = dataContainer.Result;
                        }
                        else
                        {
                            StringBuilder errMsg = new StringBuilder("抓取网页请求失败!参数:");
                            errMsg.Append(soapCmd.Body);
                            if (null != dataContainer && !string.IsNullOrEmpty(dataContainer.ErrorMsg))
                            {
                                errMsg.Append(";服务端错误消息:")
                                    .Append(dataContainer.ErrorMsg);
                            }
                            PluginContext.Logger.Error(errMsg.ToString());
                        }
                    }

数据交互示范

///

/// 增加 ///[TestMethod()] public void AddOneStudentsModelTest() {

 

        var model = new StudentsModel
        {

            Name = "你猜猜-" + DateTime.Now.ToString(),
            Age = DateTime.Now.Second,
            Sex = true,
            Score = 55.98m,
            Longitude = 555555.6666,
            AddTime = DateTime.Now
        };
       
        var result = serviceOfStudents.AddOneStudentsModel(model);


        var watch = new System.Diagnostics.Stopwatch();
        watch.Start();

         model = new StudentsModel
        {

            Name = "你猜222222222猜-" + DateTime.Now.ToString(),
            Age = DateTime.Now.Second,
            Sex = true,
            Score =6655.98m,
            Longitude = 99999999,
            AddTime = DateTime.Now
        };

         result = serviceOfStudents.AddOneStudentsModel(model);


        watch.Stop();

        Console.WriteLine(string.Format("real for insert one data use time  is :{0} ms.", watch.ElapsedMilliseconds));




        Assert.IsTrue(result > 0);
    }

    /// <summary>
    /// 批量增加
    /// </summary>

    [TestMethod()]
    public void AddMulitiStudentsModelsTest()
    {
        var lstData = new List<StudentsModel>();
        var rand = new Random(DateTime.Now.Millisecond);

        for (int i = 0; i < 100; i++)
        {
            var model = new StudentsModel
            {
                Name = "你猜猜-" + Guid.NewGuid().ToString(),
                Age = rand.Next(1, 100),
                Sex = false,
                Score = 33355.98m,
                Longitude = 59595959,
                AddTime = DateTime.Now
            };
            lstData.Add(model);
        }
        var result = serviceOfStudents.AddMulitiStudentsModels(lstData);
        Assert.IsTrue(1 == 1);

    }

    /// <summary>
    /// 更新数据实体-by主键
    /// </summary>

    [TestMethod()]
    public void UpdateOneStudentsModelTest()
    {
        var model = new StudentsModel
        {
            Id = 1,
            Age = 100
        };

        var result = serviceOfStudents.UpdateOneStudentsModel(model);
        Assert.IsTrue(result);
    }




    /// <summary>
    /// 条件更新
    /// 多个条件 and 
    /// 
    /// </summary>

    [TestMethod()]
    public void UpdateStudentsModelsByConditionTest()
    {
        var model = new StudentsModel
        {
            Age = 333
        };
        var result = serviceOfStudents.UpdateStudentsModelsByCondition(
            model,
            x => x.Id > 0 && x.Name.Contains("你猜猜%"));

        Assert.IsTrue(result);

    }


    /// </summary>
    [TestMethod()]
    public void GetstudentsElementByIdTest()
    {

        var model = this.serviceOfStudents
            .GetstudentsElementById(1);

        Assert.IsTrue(null!= model);

      
    }

    /// <summary>
    /// 条件获取
    ////// </summary>
    [TestMethod()]
    public void GetfStudentsElementsByConditionTest()
    {

        var lstData = this.serviceOfStudents
            .GetstudentsElementsByCondition(
            x => x.Id == 1 || x.Name.Contains("你猜猜%")
            );//(x => x.PubSubWsAddr.LenFuncInSql() > 0);

        Assert.IsTrue(lstData.Count > 0);

        lstData = this.serviceOfStudents
            .GetstudentsElementsByCondition(null);

        Assert.IsTrue(lstData.Count > 0);
    }
    /// <summary>
    /// 条件删除 
    /// </summary>
    [TestMethod()]
    public void DeleteMulitiservicesAddressByConditionTest()
    {
        //var result = this.serviceOfStudents
        //    .DeleteMulitiservicesAddressByCondition(x => x.PubSubWsAddr.LenFuncInSql() > 0);

        var result = this.serviceOfStudents
           .DeleteMulitistudentsByCondition(
              x => x.Id == 1 || x.Name.Contains("你猜猜%")
        );
        Assert.IsTrue(result);
    }


    //多个查询条件构建 (使用Lambda表达式构建 进行条件body的合并)

    [TestMethod()]
    public void GetByMultipleConditionsTest()
    {
        //组合条件
        var predicate = PredicateBuilder.CreatNew<StudentsModel>();
       
        string id = "55";
        if (!string.IsNullOrEmpty(id) && id.ToInt() > 0)
        {
            predicate = predicate.And(s => s.Id <= id.ToInt());
        }

        //开始组合表达式body
        predicate = predicate.Or(s => s.Name.Contains("你猜猜-2%"));


        var model = this.serviceOfStudents.GetstudentsElementsByCondition(predicate);

        Assert.IsNotNull(model);

    }


    [TestMethod()]
    public void GetStudentsModelsElementsByPagerAndConditionTest()
    {
        var pageSize = 10;
        var pageIndex = 0;
        var totalRecords = -1;
        var totalPages = -1;

        var lstData = this.serviceOfStudents
           .GetstudentsElementsByPagerAndCondition(pageIndex,
           pageSize,
           out totalRecords,
           out totalPages, x => x.Id > 0,//PubSubWsAddr.LenFuncInSql() 
           "Id",
           OrderRule.DESC);
        Assert.IsTrue(lstData.Count > 0);


    }

联系作者

MyBlog:http://www.cnblogs.com/micro-chen/ 
QQ:1021776019

 

原文地址:https://www.cnblogs.com/micro-chen/p/9075658.html