控制台爬取小说(大王饶命)

 1    var url = GetWBJokeUrl("/book/1719.html");
 2             string next;
 3             GetContent(url, out next);
 4             while (true)
 5             {
 6                 ConsoleKeyInfo info = System.Console.ReadKey();
 7                 switch (info.Key)
 8                 {
 9                     case ConsoleKey.E:
10                         Environment.Exit(0);
11                         break;
12                     case ConsoleKey.RightArrow:
13                         GetContent(GetWBJokeUrl(next), out next);
14                         break;
15                     default:
16                         System.Console.WriteLine(info.Key);
17                         break;
18                 }
19 
20             }
使用
 1  /// <summary>
 2         /// 获取大王饶命小说页面
 3         /// </summary>
 4         /// <param name="firstUrl">第一次进入的页面</param>
 5         /// <param name="nexturl">下一页</param>
 6         private static void GetContent(string firstUrl, out string nexturl)
 7         {
 8             var html = GetUrlContent(firstUrl);
 9             var url = @"<a href=([^>]+?)>下一页</a>";
10             string re1 = "/.+html";
11             nexturl = MatchReg(re1, MatchReg(url, html));
12             var divContent = @"(?m)<div id=""BookText""[^>]*>(?<div>(?:w|W)*?)</div[^>]*>";
13             html = MatchReg(divContent, html, "div").Trim().Replace("<br />", "");
14             var delh4 = @"<h4>([sS]*?)</h4>";
15             html = html.Replace(MatchReg(delh4, html), "");
16             Console.WriteLine(html);
17 
18         }
19 
20         /// <summary>
21         /// 筛选数据
22         /// </summary>
23         /// <param name="regStr">正则字符串</param>
24         /// <param name="html">网页标签</param>
25         /// <param name="input">需要获取的标签</param>
26         /// <returns></returns>
27         public static string MatchReg(string regStr, string html, string input = "0")
28         {
29             var reg = new Regex(regStr, RegexOptions.Multiline | RegexOptions.IgnoreCase);
30             var mc = reg.Match(html);
31             if (mc.Success)
32             {
33                 return mc.Groups[input].Value;
34             }
35             return "";
36         }
37 
38         /// <summary>
39         /// 爬取地址
40         /// </summary>
41         const string qsbkMainUrl = "http://www.dawangraoming.com";
42         /// <summary>
43         /// 爬取页面位置
44         /// </summary>
45         /// <param name="firsturl"></param>
46         /// <returns></returns>
47         private static string GetWBJokeUrl(string firsturl)
48         {
49             StringBuilder url = new StringBuilder();
50             url.Append(qsbkMainUrl);
51             url.Append(firsturl);
52             return url.ToString();
53         }
54 
55         /// <summary>
56         /// /伪装网站访问
57         /// </summary>
58         /// <param name="url">目标网站地址</param>
59         /// <returns></returns>
60         private static string GetUrlContent(string url)
61         {
62             try
63             {
64 
65                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
66 
67                 request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.8.1000 Chrome/30.0.1599.101 Safari/537.36";
68 
69                 request.Method = "GET";
70 
71                 request.ContentType = "text/html;charset=UTF-8";
72 
73                 HttpWebResponse response = (HttpWebResponse)request.GetResponse();
74 
75                 Stream myResponseStream = response.GetResponseStream();
76 
77                 StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
78 
79                 string retString = myStreamReader.ReadToEnd();
80 
81                 myStreamReader.Close();
82 
83                 myResponseStream.Close();
84 
85                 return retString;
86 
87             }
88 
89             catch { return null; }
90 
91         }
封装方法
好好学习,天天向上。
原文地址:https://www.cnblogs.com/Zhengxue/p/8864869.html