get Toutiao.com news list

.get cookie with your browser then can do 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Net;
using System.Text;
using HtmlAgilityPack;
using Newtonsoft.Json;

namespace WindowsFormsApp1
{
    static class Program
    {
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        [STAThread]
        static void Main()
        {
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            // Application.Run(new Form1());
            List<string> _listURL = new List<string>();
            test(_listURL,null);

        }

        static string _signature = "_02B4Z6wo00f01F.sjWQAAIBCgeX81SJdOnBf6YnAAEhxoVC2JRfZ0QahpPnV6EmcPuHz04o3v5s-QKaNBQ2luCcHl2v4nNzEuGllKmCUQVrCn77y3PXmNdXh-3lMlrkkvt4ItWbtvRpTLbfB6f";
        static string cookie = "csrftoken=620bbe6e6bec516a228938d9adc9d616; tt_webid=6888172478528259598; MONITOR_WEB_ID=53483fbc-ef71-4859-91cd-1833242346e8; s_v_web_id=verify_kgrjtabo_fuY8uuEL_ry3f_4d0q_8fHv_EJ3P2bCGH4Ky; ttcid=2d01cbca59c4456ba0a8137e857a01e511; tt_scid=LtVXXilIPbrZ2acC-MSiW7ELGN9OtxOtSL0Aof-FE-vtbYbZFhCAJlgknDzQu0uZ32f1";
        static Random rnd = new Random();

        static void test(List<string> listURL, string max_behot_time)
        {
                WebClient wc = new WebClient();
                wc.Encoding = Encoding.UTF8;
                wc.Headers.Add("referer", "https://www.toutiao.com/");
                wc.Headers.Add("cookie", cookie);
                string url = "https://www.toutiao.com/api/pc/feed/?min_behot_time=0&category=__all__&utm_source=toutiao&widen=1&tadrequire=true&_signature="+_signature;
                if ( string.IsNullOrEmpty( max_behot_time)) {
                    url = "https://www.toutiao.com/api/pc/feed/?max_behot_time=" + max_behot_time + "&category=__all__&utm_source=toutiao&widen=1&tadrequire=true&_signature=" + _signature;
                }
                string strJson = wc.DownloadString(url);
                //then get source_url:  group/6887092770658320903/
                //contact to be url : https://www.toutiao.com/group/6887092770658320903/
                if (strJson == "") return;
                var list = JsonConvert.DeserializeObject<NewsObject>(strJson);
                list.data.ToList().ForEach(n => listURL.Add("https://www.toutiao.com" + n.source_url));
                wc.Dispose();
                wc = null;
               if (listURL.Count >= 10) {
                  return;
               }
               test(listURL, ""+list.next.max_behot_time);
            // string str = wc.DownloadString("https://www.toutiao.com/group/6887092770658320903/");
            // var htmlText = parseHtml(str);
            // cutRandSection(htmlText);



        }


        static string parseHtml(string strHtml)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(strHtml);
            HtmlNode rootnode = doc.DocumentNode;
            string xpathstring = "//article";//</article> ////span[@class='num']/font[last()]
            var node = rootnode.SelectSingleNode(xpathstring);
            string s = node.InnerText;
            return s;
        }


        static string cutRandSection(string htmlText)
        {
            try
            {
                var list = htmlText.Split("
".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                int i = rnd.Next(0, list.Length + 1);
                return list[i];
            }
            catch
            {

                return htmlText;
            }

        }

        static string currTimeStamp{
        get{
                return ""+ (DateTime.Now.AddMinutes(0).ToUniversalTime().Ticks - 621355968000000000) / 10000000;
          }
        
        
        
        }
    }

    #region NewsObject
    public class NewsObject
    {
        public bool has_more { get; set; }
        public string message { get; set; }
        public Datum[] data { get; set; }
        public Next next { get; set; }
    }

    public class Next
    {
        public int max_behot_time { get; set; }
    }

    public class Datum
    {
        public bool single_mode { get; set; }
        public string _abstract { get; set; }
        public bool middle_mode { get; set; }
        public bool more_mode { get; set; }
        public string tag { get; set; }
        public int comments_count { get; set; }
        public string tag_url { get; set; }
        public string title { get; set; }
        public string chinese_tag { get; set; }
        public string source { get; set; }
        public long group_source { get; set; }
        public bool has_gallery { get; set; }
        public string media_url { get; set; }
        public string media_avatar_url { get; set; }
        public string source_url { get; set; }
        public string article_genre { get; set; }
        public string item_id { get; set; }
        public bool is_feed_ad { get; set; }
        public long behot_time { get; set; }
        public string image_url { get; set; }
        public string group_id { get; set; }
        public string middle_image { get; set; }
        public Image_List[] image_list { get; set; }
    }

    public class Image_List
    {
        public string url { get; set; }
    }

    #endregion







}

  

make article:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Net;
using System.Text;
using HtmlAgilityPack;
using Newtonsoft.Json;
using System.Diagnostics;

namespace WindowsFormsApp1
{
    static class Program
    {
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        [STAThread]
        static void Main()
        {
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            // Application.Run(new Form1());


            if (keyWords == "") { keyWords="AI智能写作"; }
            List<string> _listURL = new List<string>();
            getArticleList(_listURL, 0);
            MakeArticle(_listURL);
        }


        static string keyWords= "蚂蚁金服上市";
        static string _signature = "_02B4Z6wo00f01F.sjWQAAIBCgeX81SJdOnBf6YnAAEhxoVC2JRfZ0QahpPnV6EmcPuHz04o3v5s-QKaNBQ2luCcHl2v4nNzEuGllKmCUQVrCn77y3PXmNdXh-3lMlrkkvt4ItWbtvRpTLbfB6f";
        static string cookie = "csrftoken=620bbe6e6bec516a228938d9adc9d616; tt_webid=6888172478528259598; MONITOR_WEB_ID=53483fbc-ef71-4859-91cd-1833242346e8; s_v_web_id=verify_kgrjtabo_fuY8uuEL_ry3f_4d0q_8fHv_EJ3P2bCGH4Ky; ttcid=2d01cbca59c4456ba0a8137e857a01e511; tt_scid=LtVXXilIPbrZ2acC-MSiW7ELGN9OtxOtSL0Aof-FE-vtbYbZFhCAJlgknDzQu0uZ32f1";
        static Random rnd = new Random();
        static bool includePic = true;
        
        static void getArticleList(List<string> listURL, int pageIndex)
        {
            int pageCount = 20;
            string url = "https://www.toutiao.com/api/search/content/?aid=24&app_name=web_search&offset=" + (pageIndex * pageCount) + "&format=json&keyword="+ Uri.EscapeDataString(keyWords)+"&autoload=true&count=20&en_qc=1&cur_tab=1&from=search_tab&pd=synthesis&timestamp=" + currTimeStamp + "000" + "&_signature=" + _signature;
            string strJson = HttpGet(url);
            if (strJson == "") return;
            var list = JsonConvert.DeserializeObject<NewsObject>(strJson);
            foreach (var d in list.data.Where(n => !string.IsNullOrEmpty(n.open_url)))
            {
                listURL.Add("https://www.toutiao.com" + d.open_url);
            }
            if (listURL.Count >= 20)
            {
                return;
            }
            getArticleList(listURL, pageIndex++);
        }
        static string MakeArticle(List<string> _listURL)
        {

            string OKArticle = "";
            string pic = "";
            foreach (var url in _listURL)
            {
                pic = "";
                string strHtml = HttpGet(url);
                if (strHtml == "") continue;
                ContentData cdata=ParseContentData(strHtml);
                if (string.IsNullOrEmpty(cdata.InnerText)) continue;
                if (includePic) {
                    if (cdata.InnerPicUrls != null && cdata.InnerPicUrls.Count > 0)
                    {
                        pic = cdata.InnerPicUrls[0];
                    }
                }
                OKArticle += "<p><img src='"+pic+"'/></p>"+cutRandSection(cdata.InnerText);
                if (OKArticle.Length >= 800) break;
            }

            return OKArticle;
        }


        static ContentData ParseContentData(string strHtml)
        {

            ContentData cdata = new ContentData() { SoruceHtml=strHtml  };
            try
            {
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(strHtml);
                HtmlNode rootnode = doc.DocumentNode;
                string xpathstring = "//article";//</article> ////span[@class='num']/font[last()]
                var node = rootnode.SelectSingleNode(xpathstring);
                if (node == null) return cdata;
                string s = node.InnerText;
                cdata.InnerText= s;
                //
                var list = rootnode.SelectNodes("//img");
                if (list == null) return cdata;
                foreach (var n in list)
                {
                    var picUrl = n.Attributes["src"].Value;
                    if (string.IsNullOrEmpty(picUrl)|| !picUrl.ToLower().StartsWith("http")) { continue; }
                    cdata.InnerPicUrls.Add(picUrl);
                }




            }
            catch
            {
               

            }
            return cdata;


        }

        enum ddd {
        eee,eeer
        
        }
        static string cutRandSection(string htmlText)
        {
            try
            {
                var list = htmlText.Split("
".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                int i = rnd.Next(0, list.Length + 1);
                return list[i];
            }
            catch
            {

                return htmlText;
            }

        }

        static string currTimeStamp
        {
            get
            {
                return "" + (DateTime.Now.AddMinutes(0).ToUniversalTime().Ticks - 621355968000000000) / 10000000;
            }



        }


        public static string HttpGet(string url)
        {

            string strJson = "";
            try
            {
                WebClient wc = new WebClient();
                wc.Encoding = Encoding.UTF8;
                wc.Headers.Add("referer", "https://www.toutiao.com/");
                wc.Headers.Add("cookie", cookie);
                strJson = wc.DownloadString(url);
                wc.Dispose();
                wc = null;
            }
            catch { }
            return strJson;
        }



    }

    public class ContentData {
        public string SoruceHtml { get; set; }
        public string InnerText { get; set; }
        public List<string> InnerPicUrls { get; set; }
        public ContentData(){
            InnerPicUrls = new List<string>();

           }

    }



    #region NewsObject


    public class NewsObject
    {
        public int count { get; set; }
        public int return_count { get; set; }
        public string query_id { get; set; }
        public int has_more { get; set; }
        public string request_id { get; set; }
        public string search_id { get; set; }
        public long cur_ts { get; set; }
        public int offset { get; set; }
        public string message { get; set; }
        public string pd { get; set; }
        public int show_tabs { get; set; }
        public string keyword { get; set; }
        public string city { get; set; }
        public string[] tokens { get; set; }
        public Datum[] data { get; set; }
        public int status { get; set; }
    }



    public class Datum
    {
        public string create_time { get; set; }
        public string open_url { get; set; }

        public string source_url { get; set; }

    }



    #endregion






}

  

原文地址:https://www.cnblogs.com/wgscd/p/13884546.html