GFT_News Auto

using AnfleCrawler.Common;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace AnfleCrawler.DataAnalyzer
{
    internal class GFT_News : AnalyzerBase
    {
        public override void Init(PageCrawler crawler)
        {
            //    var pHandler = new PageContentHandler()
            //    {
            //        Url = new Uri("http://admin.gofangtong.com/Login.aspx"),
            //        CrossLoad = (arg, xDom) =>
            //        {
            //            if (arg.IsRedirect)
            //            {
            //                arg.IsRedirect = false;
            //                return;
            //            }
            //            arg.IsRedirect = true;
            //            var input = xDom.GetElementById("txtusername");
            //            input.SetAttribute("value", "admin");
            //            input = xDom.GetElementById("txtuserpass");
            //            input.SetAttribute("value", "123456");
            //            var btn = xDom.GetElementById("btnlogin");
            //            btn.InvokeMember("click");
            //        }
            //    };
            //    Crawler.Lander.GetDocument(pHandler);

            //crawler.PushUrl(new System.Net.StringPatternGenerator("http://webapi.anfle.com/BMLF/BmlfList?page=[1-75]&rows=50&sort=PublishDate&order=desc"), 0);
            base.Init(crawler);
        }

        protected override void AnalyzeInternal(PageLandEntity current)
        {
            var lander = Crawler.Lander;
            var pHandler = CreateContentHandler(current);

            var dom = lander.GetDocument(pHandler);
            string text = dom.DocumentNode.InnerText;
            //App.LogInfo("GFT:

{0}", text);
            var json = JObject.Parse(text);
            var client = new HttpClient();
            client.SendReceiveTimeout = int.MaxValue;
            foreach (var item in json["rows"])
            {
                try
                {
                    string content = System.Web.HttpUtility.UrlEncode(item.Value<string>("CONTENT"));
                    client.SetRequest(new Uri("http://webapi.anfle.com/BMLF/Match"));
                    client.Form["content"] = content;
                    var matchtext = client.GetResponse().GetResponseText();
                    var mjson = JObject.Parse(matchtext);
                    var mset = (JArray)mjson["Message"];
                    var str = new StringBuilder();
                    var configs = new KeyValuePair<string, string>[] 
                    {
                        new KeyValuePair<string, string>("CITY_NAME","城市"),
                        new KeyValuePair<string, string>("STANDARD_REGION","区域"),
                        new KeyValuePair<string, string>("SHORT_NAME_CHS","企业"),
                        new KeyValuePair<string, string>("XIANGMUMINGCHENG","项目"),
                    };
                    for (int i = 0; i < mset.Count; i++)
                    {
                        if (mset[i] == null)
                        {
                            continue;
                        }
                        var c = configs[i];
                        var thenSet = mset[i].Select(p =>
                        {
                            string ext = string.Empty;
                            if (i == 1 && p["CITY_NAME"] != null)
                            {
                                ext = p.Value<string>("CITY_NAME") + "-";
                            }
                            return ext + p.Value<string>(c.Key) + "|" + c.Value;
                        });
                        if (!thenSet.Any())
                        {
                            continue;
                        }
                        str.Append(",").Append(string.Join(",", thenSet));
                    }
                    string rowid = item.Value<string>("ROWID");
                    if (str.Length == 0)
                    {
                        Crawler.OutWrite("Skip Empty {0}", rowid);
                        continue;
                    }

                    client.SetRequest(new Uri("http://webapi.anfle.com/BMLF/Save"));
                    client.Form["isModify"] = "true";
                    client.Form["rowId"] = rowid;
                    client.Form["val"] = str.ToString();
                    client.Form["kind"] = item.Value<string>("KIND");
                    client.Form["title"] = item.Value<string>("TITLE");
                    client.Form["source"] = item.Value<string>("SOURCE");
                    client.Form["date"] = item.Value<string>("PUBLISHDATE");
                    //client.Form["content"] = content;
                    client.Form["state"] = Convert.ToInt16(Convert.ToBoolean(item.Value<string>("STATE"))).ToString();
                    string ret = client.GetResponse().GetResponseText();
                    Crawler.OutWrite("Match OK {0} {1}", rowid, ret);
                }
                catch (Exception ex)
                {
                    Crawler.OutWrite("Error {0}", ex.Message);
                    App.LogError(ex, "GFT");
                }
            }
        }
    }
}
原文地址:https://www.cnblogs.com/Googler/p/4168722.html