网页内容抓取工具、利用多线程

http://www.cnblogs.com/hanguoji/archive/2007/02/27/657902.html

一共涉及三个类分别为:
数据访问类DBObject.cs、对应功能针对数据库操作类IRMNewsInteDB.cs、数据抓取类SpiderDispose.cs

数据访问类:DBObject.cs

using System;
using System.Data;
using System.Data.SqlClient;

namespace IRMSpiderTool.DBA
{
    
/// <summary>
    
/// 所有数据库连接类的基类
    
/// </summary>

    public abstract class DBObject
    
{
        
/// <summary>
        
/// DBObject的连接对象
        
/// </summary>

        protected SqlConnection Connection;

        
/// <summary>
        
/// 私有变量,当前对象的连接字符串
        
/// </summary>

        private string _connectionString = string.Empty;
        
/// <summary>
        
/// 私有变量,执行数据库操作所产生的错误信息
        
/// </summary>

        private string _strError = string.Empty;

        
/// <summary>
        
/// 通过连接字符串创建数据库连接对象
        
/// </summary>
        
/// <param name="newConnectionString">连接字符串</param>

        protected void CreateConnection( string newConnectionString )
        
{
            _connectionString 
= newConnectionString;
            Connection 
= new SqlConnection(_connectionString);
        }


        
/// <summary>
        
/// 只读属性,返回当前对象的连接字符串
        
/// </summary>

        protected string ConnectionString
        
{
            
get
            
{
                
return _connectionString;
            }

        }


        
/// <summary>
        
/// 只读属性,返回错误与异常信息
        
/// </summary>

        protected string StrError
        
{
            
get
            
{
                
return _strError;
            }

        }

        
        
/// <summary>
        
/// 将错误信息赋值给变量用于外部调用
        
/// </summary>
        
/// <param name="strFunctionName">出错所调用的函数名称</param>
        
/// <param name="strProcName">出错所执行的存储过程或是SQL</param>
        
/// <param name="strError">错误异常信息</param>

        private void SetErrorMsg(string strFunctionName,string strProcName,string strErrMsg)
        
{
            _strError 
= "错误归属函数:" + strFunctionName + "\n错误标识:" + strProcName + "\n异常信息:" + strErrMsg;
        }


//        /// <summary>
//        /// 创建一个返回参数用于判断执行存储过程是否成功
//        /// </summary>
//        /// <param name="StoredProcName">存储过程名</param>
//        /// <param name="sqlParameters">参数数组</param>
//        /// <returns>新的Command对象</returns>
//        private SqlCommand BuildIntCommand( string storedProcName,IDataParameter[] sqlParameters )
//        {
//            SqlCommand sqlCommand = BuildQueryCommand( storedProcName, sqlParameters );
//
//            sqlCommand.Parameters.Add( new SqlParameter( "@iSuccess",
//                SqlDbType.Int,    4,    //    
//                ParameterDirection.Output,    false,    //    可以为空
//                0,    //    精度
//                0,    //    范围
//                string.Empty,    DataRowVersion.Default,    null));
//        
//            return sqlCommand;
//        }

        
/// <summary>
        
/// 创建用于存储过程执行的 Command
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <returns>新的Command对象</returns>

        private SqlCommand BuildQueryCommand( string storedProcName, IDataParameter[] sqlParameters )
        
{
            SqlCommand sqlCommand 
= new SqlCommand( storedProcName, Connection );
            sqlCommand.CommandType 
= CommandType.StoredProcedure;

            
foreach (SqlParameter parameter in sqlParameters)
            
{
                sqlCommand.Parameters.Add( parameter );
            }


            
return sqlCommand;
        }

        
private SqlCommand BuildQueryCommand( string storedProcName)
        
{
            SqlCommand sqlCommand 
= new SqlCommand( storedProcName, Connection );
            sqlCommand.CommandType 
= CommandType.StoredProcedure;
            
return sqlCommand;
        }

        
/// <summary>
        
/// 创建用于SQL 语句执行的 Command
        
/// </summary>
        
/// <param name="storedProcName">要执行的SQL语句</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <returns>新的Command对象</returns>

        private SqlCommand BuildSqlCommand( string strSql, IDataParameter[] sqlParameters )
        
{
            SqlCommand sqlCommand 
= new SqlCommand( strSql, Connection );

            
foreach (SqlParameter parameter in sqlParameters)
            
{
                sqlCommand.Parameters.Add( parameter );
            }


            
return sqlCommand;
        }


        
/// <summary>
        
/// 创建无参数用于SQL语句执行的 Command 对象
        
/// </summary>
        
/// <param name="storedProcName">要执行的SQL语句</param>
        
/// <returns>新的Command对象</returns>

        private SqlCommand BuildSqlCommand( string strSql )
        
{
            SqlCommand sqlCommand 
= new SqlCommand( strSql, Connection );
            
return sqlCommand;
        }



        
/// <summary>
        
/// 执行存储过程,返回该存储过程影响的行数
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单,注意:如果需要输出参数,应把输出参数索引定为最后一项</param>
        
/// <returns>执行是否成功,大于0成功,小于0失败</returns>

        protected int RunProcInt( string storedProcName, IDataParameter[] sqlParameters)
        
{
            
int iSuccess = 0;
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlCommand sqlCommand 
= BuildQueryCommand( storedProcName, sqlParameters );
                sqlCommand.ExecuteNonQuery();
                iSuccess 
= (int)sqlCommand.Parameters[sqlParameters.Length - 1].Value;
                sqlCommand.Dispose();
                
return iSuccess;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunProcInt",storedProcName,ex.Message);    //处理错误
                return -2;
            }

            
finally
            
{
                Connection.Close();
            }

        }



        
/// <summary>
        
/// 执行存储过程,为该存储过程所有输出参数赋值供使用
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单,注意:如果需要输出参数,应把输出参数索引定为最后一项</param>
        
/// <returns>返回执行后需要输出的参数的值</returns>

        protected void RunProcOutPara( string storedProcName, IDataParameter[] sqlParameters)
        
{
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlCommand sqlCommand 
= BuildQueryCommand( storedProcName, sqlParameters );
                sqlCommand.ExecuteNonQuery();
                sqlCommand.Dispose();
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunProcInt",storedProcName,ex.Message);    //处理错误
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行存储过程并返回一个DataReader
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <returns>返回数据读取器</returns>

        protected SqlDataReader RunProcDataReader( string storedProcName, IDataParameter[] sqlParameters )
        
{
            SqlDataReader returnReader;
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlCommand sqlCommand 
= BuildQueryCommand( storedProcName, sqlParameters );
                returnReader 
= sqlCommand.ExecuteReader();
                returnReader.Close();
                sqlCommand.Dispose();
                
return returnReader;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunProcDataReader",storedProcName,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行存储过程,并返回新的DS对象
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <param name="tableName">返回DS中的表名</param>
        
/// <returns>返回一个新的数据集</returns>

        protected DataSet RunProcDataSet( string storedProcName, IDataParameter[] sqlParameters, string tableName )
        
{
            
try
            
{
                DataSet dataSet 
= new DataSet();
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildQueryCommand( storedProcName, sqlParameters );
                sqlDA.Fill( dataSet, tableName );
                sqlDA.Dispose();
                
return dataSet;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunProcDataSet",storedProcName,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行存储过程,并返回新的DT对象
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单</param>

        protected DataTable RunProcDataTable( string storedProcName, IDataParameter[] sqlParameters)
        
{
            DataTable dataTable 
= new DataTable();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildQueryCommand( storedProcName, sqlParameters );
                sqlDA.Fill( dataTable );
                sqlDA.Dispose();
                
return dataTable;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunProcDataTable",storedProcName,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行存储过程,并返回新的DT对象
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单</param>

        protected DataTable RunProcDataTable( string storedProcName)
        
{
            DataTable dataTable 
= new DataTable();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildQueryCommand( storedProcName);
                sqlDA.Fill( dataTable );
                sqlDA.Dispose();
                
return dataTable;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunProcDataTable",storedProcName,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行存储过程,并返回新的DataRow对象
        
/// </summary>
        
/// <param name="storedProcName">存储过程名</param>
        
/// <param name="sqlParameters">参数清单</param>

        protected DataRow RunProcDataRow( string storedProcName, IDataParameter[] sqlParameters)
        
{
            DataTable dataTable 
= new DataTable();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildQueryCommand( storedProcName, sqlParameters );
                sqlDA.Fill( dataTable );
                sqlDA.Dispose();
                
return dataTable.Rows[0];
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunProcDataRow",storedProcName,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行简单语句的Sql操作(包括:查询,修改)
        
/// </summary>
        
/// <param name="strsql">SQL语句</param>
        
/// <param name="ary_par">SQL语句中的参数集合</param>
        
/// <param name="b_change">参数b_change为判断是更改数据表还是查询数据表,如果b_change为true则处理改变数据库的操作,否则则处理简单查询数据库的操作</param>
        
/// <returns></returns>

        protected int RunSqlInt(string strSql,IDataParameter[] sqlParameters)
        
{
            
int iSuccess = 0;
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlCommand sqlCommand 
= BuildSqlCommand( strSql, sqlParameters );
                iSuccess 
= sqlCommand.ExecuteNonQuery();
                sqlCommand.Dispose();
                
return iSuccess;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlInt",strSql,ex.Message);    //处理错误
                return -2;
            }

            
finally
            
{
                Connection.Close();
            }

        }

        
/// <summary>
        
/// 执行SQL语句返回记录DataReader
        
/// </summary>
        
/// <param name="strsql">SQL语句<</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <returns>返回一个新的数据集</returns>

        protected SqlDataReader RunSqlDataReader(string strSql,IDataParameter[] sqlParameters)
        
{
            SqlDataReader returnReader;
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlCommand sqlCommand 
= BuildSqlCommand( strSql, sqlParameters );
                returnReader 
= sqlCommand.ExecuteReader();
                returnReader.Close();
                sqlCommand.Dispose();
                
return returnReader;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlDataReader",strSql,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行SQL语句,并返回新的DS对象
        
/// </summary>
        
/// <param name="strSql">SQL语句</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <param name="tableName">返回DS中的表名</param>
        
/// <returns>返回一个新的数据集</returns>

        protected DataSet RunSqlDataSet( string strSql, IDataParameter[] sqlParameters, string tableName )
        
{
            DataSet dataSet 
= new DataSet();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildSqlCommand( strSql, sqlParameters );
                sqlDA.Fill( dataSet, tableName );
                sqlDA.Dispose();
                
return dataSet;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlDataSet",strSql,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行SQL语句,并返回新的DT对象
        
/// </summary>
        
/// <param name="strSql">SQL语句</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <returns>返回一个新的数据集</returns>

        protected DataTable RunSqlDataTable( string strSql, IDataParameter[] sqlParameters)
        
{
            DataTable dataTable 
= new DataTable();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildSqlCommand( strSql, sqlParameters );
                sqlDA.Fill( dataTable );
                sqlDA.Dispose();
                
return dataTable;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlDataTable",strSql,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行SQL语句,并返回新的DataRow对象
        
/// </summary>
        
/// <param name="strSql">SQL语句</param>
        
/// <param name="sqlParameters">参数清单</param>
        
/// <returns>返回一个新的数据集</returns>

        protected DataRow RunSqlDataRow( string strSql, IDataParameter[] sqlParameters)
        
{
            DataTable dataTable 
= new DataTable();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildSqlCommand( strSql, sqlParameters );
                sqlDA.Fill( dataTable );
                sqlDA.Dispose();
                
return dataTable.Rows[0];
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlDataRow",strSql,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }



        
/// <summary>
        
/// 执行无参数SQL语句返回记录DataReader
        
/// </summary>
        
/// <param name="strsql">SQL语句<</param>
        
/// <returns>返回一个新的数据集</returns>

        protected SqlDataReader RunSqlDataReader(string strSql)
        
{
            SqlDataReader returnReader;
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlCommand sqlCommand 
= BuildSqlCommand( strSql );
                returnReader 
= sqlCommand.ExecuteReader();
                returnReader.Close();
                sqlCommand.Dispose();
                
return returnReader;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlDataReader",strSql,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行无参数SQL语句,并返回新的DS对象
        
/// </summary>
        
/// <param name="strSql">SQL语句</param>
        
/// <param name="tableName">返回DS中的表名</param>
        
/// <returns>返回一个新的数据集</returns>

        protected DataSet RunSqlDataSet( string strSql, string tableName )
        
{
            DataSet dataSet 
= new DataSet();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildSqlCommand( strSql );
                sqlDA.Fill( dataSet, tableName );
                sqlDA.Dispose();
                
return dataSet;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlDataSet",strSql,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


        
/// <summary>
        
/// 执行无参数SQL语句,并返回新的DT对象
        
/// </summary>
        
/// <param name="strSql">SQL语句</param>
        
/// <returns>返回一个新的数据集</returns>

        protected DataTable RunSqlDataTable( string strSql )
        
{
            DataTable dataTable 
= new DataTable();
            
try
            
{
                
if(Connection.State == ConnectionState.Closed || Connection.State == ConnectionState.Broken)
                
{
                    Connection.Open();
                }

                SqlDataAdapter sqlDA 
= new SqlDataAdapter();
                sqlDA.SelectCommand 
= BuildSqlCommand( strSql );
                sqlDA.Fill( dataTable );
                sqlDA.Dispose();
                
return dataTable;
            }

            
catch(Exception ex)
            
{
                SetErrorMsg(
"RunSqlDataTable",strSql,ex.Message);    //处理错误
                return null;
            }

            
finally
            
{
                Connection.Close();
            }

        }


    }

}


对应功能针对数据库操作类:IRMNewsInteDB.cs
对应功能的数据库访问类

数据抓取类:SpiderDispose.cs

using System;
using System.Collections;
using System.Threading;
using System.Data;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace IRMSpiderTool.SpiderDB
{
    
//声明一个回调函数
    public delegate void ExampleCallback(Hashtable htError);
    
/// <summary>
    
/// SpiderDispose 的摘要说明。
    
/// </summary>

    public class SpiderDispose
    
{
        
private string strStockCode = string.Empty;
        
public Hashtable htError;
        
private ExampleCallback callback;

        
/// <summary>
        
/// 功能描述:构造函数,用于向线程中传递参数,或用于回调函数的参数传递
        
/// </summary>
        
/// <param name="strCode">上市公司股票代码号</param>
        
/// <param name="callbackDelegate">错误信息回调函数</param>

        public SpiderDispose(string strCode,ExampleCallback callbackDelegate)
        
{
            strStockCode 
= strCode;
            callback 
= callbackDelegate;
        }


        
/// <summary>
        
/// 功能描述:执行抓取网页内容操作并调用主窗体的抽取函数进行页面内容处理
        
/// </summary>

        public void ConvertDataRunThread()
        
{
            htError 
= new Hashtable();
            
//获取抓取首地址
            string strSpiderURL = System.Configuration.ConfigurationSettings.AppSettings["SpiderURL"].ToString().Trim();
            
//页面内容
            string strPage = string.Empty;
            strSpiderURL 
= strSpiderURL + strStockCode;
            
//获取页面数据
            try
            
{
                HttpWebRequest request 
= (HttpWebRequest)WebRequest.Create(strSpiderURL);
                request.Timeout 
= 300000;
                HttpWebResponse response 
= (HttpWebResponse)request.GetResponse();
                System.IO.StreamReader srContent 
= new System.IO.StreamReader(response.GetResponseStream(),System.Text.Encoding.GetEncoding("gb2312"));
                
//获取抓取下来的页面内容
                strPage = srContent.ReadToEnd();
                response.Close();
                srContent.Close();
            }

            
catch(Exception ex)
            
{
                htError.Add(strStockCode,
"错误信息:股票代码["+strStockCode+"]页面不存在!"+ ex.Message +"\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n");
            }

            
            
if(strPage.IndexOf("股票代码["+strStockCode+"]未找到"> 0)
            
{
                htError.Add(strStockCode,
"错误信息:股票代码["+strStockCode+"]预警信息未找到!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n");
            }

            
//整理抓取下来的页面内容
            strPage = strPage.Replace("\r\n","");
            strPage 
= strPage.Replace("\t","");
            MainForm mainForm 
= new MainForm();
            
//过滤并插入
            mainForm.FilterMethod(strStockCode,strPage);
            
if (callback != null)
            
{
                callback(htError);
            }


        }

    }

}



主窗体:MainForm.cs

using System;
using System.Drawing;
using System.Collections;
using System.Configuration;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using IRMSpiderTool.SpiderDB;
using IRMSpiderTool.DBA;
using System.Threading;

namespace IRMSpiderTool
{
    
/// <summary>
    
/// MainForm 的摘要说明。
    
/// </summary>

    public class MainForm : System.Windows.Forms.Form
    
{
        
private System.Windows.Forms.GroupBox gBPrevise;
        
private System.Windows.Forms.GroupBox gBError;
        
private System.Windows.Forms.TextBox txtMessage;
        
private System.Windows.Forms.ProgressBar proBarDispose;
        
private System.Windows.Forms.Label labProBar;
        
private System.Windows.Forms.Button btnStartSpider;
        
private System.Windows.Forms.Timer timerCyc;
        
private System.ComponentModel.IContainer components;
        
//错误信息
        private string strError = string.Empty;
        
//预警提示类别信息
        private static DataTable dtPreviseType;
        
//主程序运行线程
        private Thread MainThread;

        
public MainForm()
        
{
            
//
            
// Windows 窗体设计器支持所必需的
            
//
            InitializeComponent();

            
//
            
// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
            
//
        }


        
/// <summary>
        
/// 清理所有正在使用的资源。
        
/// </summary>

        protected override void Dispose( bool disposing )
        
{
            
if( disposing )
            
{
                
if(components != null)
                
{
                    components.Dispose();
                }

            }

            
base.Dispose( disposing );
        }


        
Windows 窗体设计器生成的代码

        
private void StartRun()
        
{
            txtMessage.Text 
+= "******************开始执行 "+ DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() +"******************\r\n";
            IRMNewsInteDB newsDB 
= new IRMNewsInteDB();
            strError 
= string.Empty;
            DataTable dtType 
= newsDB.GetPreviseType(ref strError);
            DataTable dtCorp 
= newsDB.GetCorpInfo(ref strError);
            
if(strError.Length>0)
            
{
                txtMessage.Text 
+= "错误信息:" + strError + "!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                
return ;
            }

            
if(dtCorp.Rows.Count == 0)
            
{
                txtMessage.Text 
+= "提示信息:上市公司信息为空!\r\n提示时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                
return ;
            }

            
//开始抓取数据并过滤,然后插入新闻综合库
            FilterDisposeInfo(dtCorp,dtType);
            txtMessage.Text 
+= "******************执行结束 "+DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString()+"******************\r\n";
        }

        
/// <summary>
        
/// 功能描述:错误信息回调函数,显示错误信息。
        
/// </summary>
        
/// <param name="htErrorInfo">存放错误信息的</param>

        public void ErrorCallback(Hashtable htErrorInfo)
        
{
            IDictionaryEnumerator myEnumerator 
= htErrorInfo.GetEnumerator();
            
while(myEnumerator.MoveNext())
            
{
                txtMessage.Text 
+= myEnumerator.Value.ToString() + "\r\n";
            }

            txtMessage.Focus();
            txtMessage.Select(txtMessage.TextLength,
0);
            txtMessage.ScrollToCaret();
        }

        
/// <summary>
        
/// 功能描述:开始抓取页面数据并过滤,然后插入新闻综合库
        
/// </summary>
        
/// <param name="dtCorp">CorpInfo表信息</param>
        
/// <param name="dtType">PreviseType表信息</param>

        public void FilterDisposeInfo(DataTable dtCorp,DataTable dtType)
        
{
            
//为静态预警类型表赋值
            dtPreviseType = dtType;
            DataSet dsCorp 
= new DataSet();
            
//过滤掉退市的上市公司股票代码
            DataRow[] drCorp = dtCorp.Select("State = 1");
            proBarDispose.Maximum 
= drCorp.Length;
            proBarDispose.Minimum 
= 0;
            proBarDispose.Value 
= 0;
            dsCorp.Merge(drCorp);
            
foreach (DataRow drCorpCode in dsCorp.Tables[0].Rows)
            
{
                
string strTmpCode = drCorpCode["StockCode"].ToString();
                SpiderDispose spider 
= new SpiderDispose(strTmpCode,new ExampleCallback(ErrorCallback));
                Thread threadSpider 
= new Thread(new ThreadStart(spider.ConvertDataRunThread));
                threadSpider.Start();
                proBarDispose.Value
++;
                Thread.Sleep(
2000);
            }

        }


        
/// <summary>
        
/// 功能描述:抽取出需要的数据,进行插入数据库处理
        
/// </summary>
        
/// <param name="strCode">股票代码</param>
        
/// <param name="strPage">抓取的页面内容</param>

        public void FilterMethod(string strCode,string strPage)
        
{
            IRMNewsInteDB newsDB 
= new IRMNewsInteDB();
            
int iTypeCount = dtPreviseType.Rows.Count;
            
//根据预警提示各类别名称进行分段抽取数据
            for(int i=0;i<iTypeCount;i++)
            
{
                
string strTmpContent = string.Empty;
                
int iTypeStart = strPage.IndexOf(dtPreviseType.Rows[i]["PreviseTypeName"].ToString());
                
if(i==iTypeCount-1)
                
{
                    
if(iTypeStart == -1)
                    
{
                        txtMessage.Text 
+= "错误信息:股票代码["+strCode+"]预警信息<"+dtPreviseType.Rows[i]["PreviseTypeName"].ToString()+">没有数据,请核实内容!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                        
continue;
                    }

                    strTmpContent 
= strPage.Substring(iTypeStart);
                }

                
else
                
{
                    
int iTypeEnd   = strPage.IndexOf(dtPreviseType.Rows[i+1]["PreviseTypeName"].ToString());
                    
if(iTypeStart == -1 || iTypeEnd==-1)
                    
{
                        txtMessage.Text 
+= "错误信息:股票代码["+strCode+"]预警信息<"+dtPreviseType.Rows[i]["PreviseTypeName"].ToString()+">没有数据,请核实内容!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                        
continue;
                    }

                    strTmpContent 
= strPage.Substring(iTypeStart,iTypeEnd-iTypeStart);
                }

                
if(strTmpContent == "")
                
{
                    txtMessage.Text 
+= "错误信息:股票代码["+strCode+"]预警信息<"+dtPreviseType.Rows[i]["PreviseTypeName"].ToString()+">没有数据,请核实内容!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                    
continue;
                }

                
string strRegex = string.Empty;
                
//获取某类别的预警信息
                strRegex = "<td width=\"15%\" class=\"common_text\"><li>(?<Date>.*?)</td>(\\s)*<td width=\"85%\" class=\"common_text\">(?<Content>(\\s)*(.*?)(\\s)*(.*?)(\\s)*(.*?)(\\s)*(.*?)(\\s)*)</td>";
                
//(\\s)*表示0或多个空格符、回车符等,*表示比配0或多个。(.*?)表示除回车符外的所有信息
                MatchCollection TitleMatchs = Regex.Matches(strTmpContent,strRegex , RegexOptions.IgnoreCase | RegexOptions.Multiline );
                
//清空抓取的预警信息中URL带有的所有Html标记
                strError = string.Empty;
                
int iSuccess = 0;
                
//去掉所有HTML
                string strRegexHtml = "<.+?>";
                
//循环正则表达式所获取的,满足表达式的内容集合
                foreach(Match NextMatch in TitleMatchs)
                
{
                    
string strTmpReContent = Regex.Replace(NextMatch.Groups["Content"].Value, strRegexHtml ,"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                    
//执行插入新闻综合库预警信息表数据
                    iSuccess = newsDB.Previse_Add(strCode,Convert.ToInt32(dtPreviseType.Rows[i]["PreviseTypeID"]),NextMatch.Groups["Date"].Value.Trim(),strTmpReContent.Trim(),ref strError);
                    
if(iSuccess<0)
                    
{
                        txtMessage.Text 
+= "错误信息:" + strError + "!\r\n出错时间:" + DateTime.Now.ToString("yy-MM-dd"+ " " + DateTime.Now.ToShortTimeString() + "\r\n";
                        
continue;
                    }

                }

            }

        }

        
private void btnStartSpider_Click(object sender, System.EventArgs e)
        
{
            
//启动后按钮应为无效状态
            btnStartSpider.Enabled = false;
            
//第一次启动主程序
            MainThread = new Thread(new ThreadStart(StartRun));
            MainThread.Start();
            
//设置工具间隔运行状态是否运行间隔运行
            timerCyc.Start();
        }


        
private void timerCyc_Tick(object sender, System.EventArgs e)
        
{
            
if(timerCyc.Enabled == true && MainThread.IsAlive == false)
            
{
                
if(txtMessage.Text.Length>30000)
                
{
                    txtMessage.Text 
= "";
                }

                
//设置Timer启动状态
                timerCyc.Enabled = false;
                
//主程序运行
                MainThread = new Thread(new ThreadStart(StartRun));
                MainThread.Start();
                
//指定程序循环运行时间间隔
                int iClockInterval = 0;
                iClockInterval 
= int.Parse(ConfigurationSettings.AppSettings["iClockInterval"].ToString());
                
//置回Tick事件间隔时间
                timerCyc.Interval = iClockInterval;
                timerCyc.Enabled 
= true;
            }

        }


        
private void MainForm_Load(object sender, System.EventArgs e)
        
{
            
//指定程序循环运行时间间隔
            int iClockInterval = 0;
            iClockInterval 
= int.Parse(ConfigurationSettings.AppSettings["iClockInterval"].ToString());
            timerCyc.Interval 
= iClockInterval;
        }


        
private void MainForm_Closing(object sender, System.ComponentModel.CancelEventArgs e)
        
{
            
//终止主线程
            if(MainThread.IsAlive)
            
{
                MainThread.Abort();
            }

        }

    }

}


原文地址:https://www.cnblogs.com/smallfa/p/892685.html