一个基于Android系统的新闻客户端(二)

                             一个基于Android系统的新闻客户端(二)

      三、抓取消息标题

        网络爬虫很复杂很复杂,这里不能做,所以这里只做抓取网页。

        C#中有一个类叫WebClient,这个类的成员方法可以下载指定网页的html代码,用法为:

        WebClient wl=new WebClient();

        Stream sm=wl.OpenRead("http://xxxxxx");

        StreamReader sr=new StreamReader(sm);

        string str=String.Empty;

        string ch=String.Empty;

        while((ch=sm.ReadLine())!=NULL)

        {

           str +=ch;

        }

        新建一个类库项目:Crawler。

        添加类CrawlerMain。

        代码为:

using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace Crawler
{
    public class CrawlerMain
    {
        private WebClient wc;
        private string New;
        public CrawlerMain()
        {
            wc = new WebClient();
            New = String.Empty;
        }

        
        private async void ReadWeb()
        {
            New=await Main_ReadWebAsync(); 
        }

        private Task<string> Main_ReadWebAsync()
        {
            return Task.Run<string>(() =>
            {
                return this.Main_ReadWeb();
            });
        }

        public string Main_ReadWeb()
        {
            Stream sm = wc.OpenRead(ConfigurationManager.ConnectionStrings["Ardess"].ToString());
            StreamReader sr = new StreamReader(sm);
            string liner = String.Empty;
            string ch = String.Empty;
            while ((ch = sr.ReadLine()) != null)
            {
                liner += ch;
            }
            return liner;
        }

        public string GetNew()
        {
            if (String.IsNullOrEmpty(New))
            {
                this.ReadWeb();
            }
            return New;
        }

    }
}
View Code

        新建一个控制台程序

         代码为:     

using Crawler;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Test
{
    class Program
    {
        static void Main(string[] args)
        {
            string str = new CrawlerMain().Main_ReadWeb();
            //str = "sadsadsadsd<body>adasdasdasdsa</body>sadasdasdasd";
            string str_1 = str.Substring(str.LastIndexOf("box_01"), str.LastIndexOf("box_02") - str.LastIndexOf("box_01"));
            string[] strNew = str_1.Split(new char[]{'a'});
            bool j = false;
            foreach(string s in strNew)
            {
                Console.Write(s+"
");
                string[] ss = s.Split('=');
                if (ss.Length > 1)
                {
                    string[] sss = ss[1].Split('"');
                    if (j)
                    {
                        File.AppendAllText("t.txt", sss[1] + "
");
                        j = false;
                    }
                    if (sss[1] == "_bl")
                    {
                        j = true;
                    }
                   
                }
            }
            
   
            Console.Read();
        }
    }
}
View Code

        好了就这样吧!

        

原文地址:https://www.cnblogs.com/liguifa/p/3801508.html