利用正则表达式获取博客园随笔(四)

  我们前天讲到了需要加入多线程来缓解界面卡死的现象,那现在就让我来给大家介绍一个由博客园的某位大牛写的线程池。(实在是忘了是谁了、、)

线程池的代码奉上:

  1     sealed class MyThreadPool
  2     {
  3         //线程锁对象
  4         private static object lockObj = new object();
  5         //任务队列
  6         private static Queue<ThreadStart> threadStartQueue = new Queue<ThreadStart>();
  7         //记录当前工作的任务集合,从中可以判断当前工作线程使用数,如果使用int判断的话可能会有问题,
  8         //用集合的话还能取得对象的引用,比较好
  9         private static HashSet<ThreadStart> threadsWorker = new HashSet<ThreadStart>();
 10         //当前允许最大工作线程数
 11         private static int maxThreadWorkerCount = 1;
 12         //当前允许最小工作线程数
 13         private static int minThreadWorkerCount = 0;
 14 
 15         /// <summary>
 16         /// 设定最大工作线程数
 17         /// </summary>
 18         /// <param name="maxThreadCount">数量</param>
 19         public static void SetMaxWorkThreadCount(int maxThreadCount)
 20         {
 21             maxThreadWorkerCount = minThreadWorkerCount > maxThreadCount ?
 22             minThreadWorkerCount : maxThreadCount;
 23         }
 24         /// <summary>
 25         /// 设定最小工作线程数
 26         /// </summary>
 27         /// <param name="maxThreadCount">数量</param>
 28         public static void SetMinWorkThreadCount(int minThreadCount)
 29         {
 30             minThreadWorkerCount = minThreadCount > maxThreadWorkerCount ?
 31             maxThreadWorkerCount : minThreadCount;
 32         }
 33         /// <summary>
 34         /// 启动线程池工作
 35         /// </summary>
 36         /// <param name="threadStartArray">任务数组</param>
 37         public static void MyQueueUserWorkItem(List<ThreadStart> threadStartArray)
 38         {
 39             //将任务集合都放入到线程池中
 40             AddAllThreadsToPool(threadStartArray);
 41             //线程池执行任务
 42             ExcuteTask();
 43         }
 44         /// <summary>
 45         /// 将单一任务加入队列中
 46         /// </summary>
 47         /// <param name="ts">单一任务对象</param>
 48         private static void AddThreadToQueue(ThreadStart ts)
 49         {
 50             lock (lockObj)
 51             {
 52                 threadStartQueue.Enqueue(ts);
 53             }
 54         }
 55 
 56         /// <summary>
 57         /// 将多个任务加入到线程池的任务队列中
 58         /// </summary>
 59         /// <param name="threadStartArray">多个任务</param>
 60         private static void AddAllThreadsToPool(List<ThreadStart> threadStartArray)
 61         {
 62             foreach (var threadStart in threadStartArray)
 63                 AddThreadToQueue(threadStart);
 64         }
 65 
 66         /// <summary>
 67         /// 执行任务,判断队列中的任务数量是否大于0,如果是则判断当前正在使用的工作线程的
 68         /// 数量是否大于等于允许的最大工作线程数,如果一旦有线程空闲的话
 69         /// 就会执行ExcuteTaskInQueen方法处理任务
 70         /// </summary>
 71         private static void ExcuteTask()
 72         {
 73             while (threadStartQueue.Count > 0)
 74             {
 75                 Thread.Sleep(100);
 76                 if (threadsWorker.Count < maxThreadWorkerCount)
 77                 {
 78                     ExcuteTaskInQueen();
 79                 }
 80             }
 81         }
 82 
 83         /// <summary>
 84         /// 执行出对列的任务,加锁保护
 85         /// </summary>
 86         private static void ExcuteTaskInQueen()
 87         {
 88             lock (lockObj)
 89             {
 90                 ExcuteTaskByThread(
 91 threadStartQueue.Dequeue());
 92             }
 93         }
 94 
 95         /// <summary>
 96         /// 实现细节,这里使用BackGroudWork来实现后台线程
 97         /// 注册doWork和Completed事件,当执行一个任务前,前将任务加入到
 98         /// 工作任务集合(表示工作线程少了一个空闲),一旦RunWorkerCompleted事件被触发则将任务从工作
 99         /// 任务集合中移除(表示工作线程也空闲了一个)
100         /// </summary>
101         /// <param name="threadStart"></param>
102         private static void ExcuteTaskByThread(ThreadStart threadStart)
103         {
104             threadsWorker.Add(threadStart);
105             BackgroundWorker worker = new BackgroundWorker();
106             worker.DoWork += (o, e) => { threadStart.Invoke(); };
107             worker.RunWorkerCompleted += (o, e) => { threadsWorker.Remove(threadStart); };
108             worker.RunWorkerAsync();
109         }
110     }
View Code

然后呢再奉上有所修改的和新增的方法的代码:

 1         public List<CnblogsResult> getResult(string Html)
 2         {
 3             List<ThreadStart> StartArray = new List<ThreadStart>();
 4             Regex regexContent = new Regex("<div class="post_item_body">(?<content>.*?)<div class="clear"></div>", RegexOptions.Singleline);//获取单个随笔数据
 5             if (regexContent.IsMatch(Html))
 6             {
 7                 MatchCollection blog = regexContent.Matches(Html);
 8                 int i = 1;
 9                 foreach (Match item in blog)
10                 {
11                     chuancanshu ccs = new chuancanshu();
12                     ccs.i = i++;
13                     ccs.item = item;
14                     StartArray.Add(new ThreadStart(() =>
15                         {
16                             Cnblogs(ccs);
17                         }));
18                 }
19                 MyThreadPool.SetMaxWorkThreadCount(5);//设置每次从线程池中启动10个线程
20                 MyThreadPool.MyQueueUserWorkItem(StartArray);//启动线程池
21             }
22             return results;
23         }
View Code
 1         private void  Cnblogs(object obj)
 2         {
 3             CnblogsResult result = new CnblogsResult();
 4             Regex regexProperty = new Regex("<h3><a.*?href="(?<href>.*?)".*?>(?<Title>.*?)</a></h3>.*?<a .*? class="lightblue".*?>(?<Author>.*?)</a>.*?发布于.*?(?<time>.*?)<span",
 5           RegexOptions.Singleline);//获取标题、时间、链接、作者等
 6             chuancanshu ccs = (chuancanshu)obj;
 7             Match item = ccs.item;
 8             int i = ccs.i;
 9             if (regexProperty.IsMatch(item.ToString()))
10             {
11                 var Property = regexProperty.Match(item.ToString());
12                 result.Title = Property.Groups["Title"].Value;
13                 result.Author = Property.Groups["Author"].Value;
14                 result.time = Property.Groups["time"].Value;
15                 result.href = Property.Groups["href"].Value;
16                 result.Rank = i;
17             }
18             results.Add(result);
19             if (getResults != null)
20             {
21                 getResults(results);
22             }
23         }
View Code
1  struct chuancanshu
2         {
3             public Match item;
4             public int i;
5         }
View Code

在这里呢,我把

        List<CnblogsResult> results = new List<CnblogsResult>();

这段这个提取出来当作公共变量了。

最后,奉上本例子的源码:点这里下载

原文地址:https://www.cnblogs.com/suixingerxing/p/3236340.html