简简单单C#爬虫小计

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace 正则
{
    class Program
    {
        static void Main(string[] args)
        {
            string url = "http://www.admin5.com/browse/177/";
            string html = GetHtml(url, Encoding.UTF8);
            Regex r = new Regex("(?<=href=").*?(?=")");
            MatchCollection mc = r.Matches(html);
            int a = 1;
            foreach (Match m in mc)
            {
                if (m.Value.Contains("article"))
                {
                    Console.WriteLine("http://www.admin5.com/" + m.Value);
                    Console.WriteLine("抓取内容");
                    string content = GetHtml(m.Value, Encoding.UTF8);
                    Regex i = new Regex("(?<=title>).*?(?=</title>)");
                    MatchCollection mm = i.Matches(content);
                    Regex rcontent = new Regex("<div class="content">[\s\S]*?</div>");
                    MatchCollection nr = rcontent.Matches(content);
                    string title = mm[0].Value;
                    string neirong = nr[0].Value;
                    Console.WriteLine("保存数据");
                    string path = Directory.GetCurrentDirectory();
                    if (!Directory.Exists(path + "\data"))
                    {
                        Directory.CreateDirectory(path + "\data");
                    }
                    File.WriteAllText(path + "\data" + "\" + a + ".txt", title + "
" + neirong);
                    a++;
                    Console.WriteLine("保存成功");
                }
            }
            Console.WriteLine("ok");
            Console.ReadKey();
        }

        private static string GetHtml(string url, Encoding encoding)
        {
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            Stream s = response.GetResponseStream();
            StreamReader sr = new StreamReader(s);
            return sr.ReadToEnd();
        }
    }
}

  

谢谢你长得这么好看还来看我的博客!
原文地址:https://www.cnblogs.com/hexd1230/p/4781526.html