Url_Filter(Code)

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Data.OleDb;
using ADOX;
using System.Collections;
namespace Url_Filter
{
public partial class Form1 : Form
{
ArrayList array_Access = new ArrayList();
static string strConn = " Provider = Microsoft.Jet.OLEDB.4.0 ; Data Source = UrlRecard.mdb ";
static OleDbConnection myConn = new OleDbConnection(strConn);
ArrayList array_Js = new ArrayList();
static bool richTextBoxVisible = false;
public Form1()
{
InitializeComponent();
this.MaximumSize = new Size(1115, 802);
this.MinimumSize = new Size(1115, 802);
}

private void label1_Click(object sender, EventArgs e)
{

}

private void btn_url_ok_Click(object sender, EventArgs e)
{
array_Access.Clear();
array_Js.Clear();
listView1.Items.Clear();
btn_url_ok.Enabled = false;
string url = urltext.Text;
creatDatabase("UrlRecard");
string strInsert = "DELETE FROM URLTable";
OleDbCommand inst = new OleDbCommand(strInsert, myConn);


GetHtmlSource(urltext.Text, Encoding.UTF8);
}
string GetHtmlSource(string url, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
MessageBox.Show("未能解析的RUL");
btn_url_ok.Enabled = true;
return "";
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
matchUrl(_html);
return _html;
}
string sencondGetHtmlSource(string url, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
return "";
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
GetHtmlImageUrlList(_html, url);
getHtmlJsFile(_html, url);
return _html;
}
public string[] GetHtmlImageUrlList(string htmlText, string _url)
{
Regex regImg = new Regex(@"<img[^<>]*?src[s ]*=[s ]*[""']?[s ]*(?<imgUrl>[^s ""'<>]*)[^<>]*?/?[s ]*>", RegexOptions.IgnoreCase);
//新建一个matches的MatchCollection对象 保存 匹配对象个数(img标签)
MatchCollection matches = regImg.Matches(htmlText);
int i = 0;
string[] sUrlList = new string[matches.Count];
//遍历所有的img标签对象
foreach (Match match in matches)
{
//获取所有Img的路径src,并保存到数组中
sUrlList[i] = match.Groups["imgUrl"].Value;
if (sUrlList[i].Length != 0)
{
ListViewItem lvi = new ListViewItem();
lvi.ForeColor = Color.Red;
string[] temp = sUrlList[i].Split(new char[] { '|' }, StringSplitOptions.None);
string com = ".com/";
string[] Domain = _url.Split(new string[] { com }, StringSplitOptions.RemoveEmptyEntries);

if (sUrlList[i].StartsWith(@"/"))
{
lvi.Text = Domain[0] + com + sUrlList[i];

}
else
{
lvi.Text = sUrlList[i];
}
lvi.SubItems.Add("");
lvi.SubItems.Add("");
lvi.SubItems.Add(DateTime.Now.ToString());
string[] res = temp[0].ToString().Split(new string[] { "." }, StringSplitOptions.None);
string html = res[res.Length - 1];
lvi.SubItems.Add(html);
this.listView1.Items.Add(lvi);
}
}
return sUrlList;
}
void getJsRes(string _js, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(_js);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
//MessageBox.Show("未能解析的RUL");
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
array_Js.Add(_html + "★" + _js);
}
void getHtmlJsFile(string _html, string _url)
{
List<string> srcList = new List<string>();
string pattern = "<script[^>]*?src="([^>]*?)"[^>]*?>";
MatchCollection mcs = Regex.Matches(_html, pattern, RegexOptions.IgnoreCase);
foreach (Match m in mcs)
{
srcList.Add(m.Groups[1].Value);
ListViewItem lvi = new ListViewItem();
lvi.ForeColor = Color.Red;
string[] temp = m.Groups[1].Value.Split(new char[] { '|' }, StringSplitOptions.None);
string com = ".com/";
string[] Domain = _url.Split(new string[] { com }, StringSplitOptions.RemoveEmptyEntries);
if (m.Groups[1].Value.StartsWith(@"//"))
{
lvi.Text = "http:" + m.Groups[1].Value;
}
else if (m.Groups[1].Value.StartsWith(@"/"))
{
lvi.Text = Domain[0] + com + m.Groups[1].Value;
}
else
{
lvi.Text = m.Groups[1].Value;
}
lvi.SubItems.Add("");
lvi.SubItems.Add("");
lvi.SubItems.Add(DateTime.Now.ToString());
string[] res = m.Groups[1].Value.ToString().Split(new string[] { "." }, StringSplitOptions.None);
string jsFormat = res[res.Length - 1];
lvi.SubItems.Add(jsFormat);
this.listView1.Items.Add(lvi);
getJsRes(lvi.Text, Encoding.UTF8);

}
}
void matchUrl(string _html)
{
List<string> hrefList = new List<string>();//链接
List<string> nameList = new List<string>();//链接名称
string pattern = @"<as*href=(""|')(?<href>[sS.]*?)(""|').*?>s*(?<name>[sS.]*?)</a>";
MatchCollection mc = Regex.Matches(_html, pattern);
foreach (Match m in mc)
{
if (m.Success)
{
//加入集合数组
if (m.Groups["href"].Value.StartsWith(@"/"))
{
wirteAccess(urltext.Text + m.Groups["href"].Value, m.Groups["name"].Value);
}
else
{
wirteAccess(m.Groups["href"].Value, m.Groups["name"].Value);
}
}
}
readAccess();
foreach (String a in array_Access)
{
ListViewItem lvi = new ListViewItem();
lvi.ForeColor = Color.Red;
string[] temp = a.Split(new char[] { '|' }, StringSplitOptions.None);
lvi.Text = temp[0];
lvi.SubItems.Add("已访问");
lvi.SubItems.Add(temp[1]);
lvi.SubItems.Add(DateTime.Now.ToString());
string[] res = temp[0].ToString().Split(new string[] { "." }, StringSplitOptions.None);
string html = res[res.Length - 1];
lvi.SubItems.Add("html");
this.listView1.Items.Add(lvi);
sencondGetHtmlSource(temp[0], Encoding.UTF8);
}
btn_url_ok.Enabled = true;
}
public static void creatDatabase(string fileName)
{
/*
打开Access文件之前,清空下Access文件
*/
try
{
myConn.Open();
string strInsert = "DELETE FROM URLTable";
OleDbCommand inst = new OleDbCommand(strInsert, myConn);
inst.ExecuteNonQuery();
}
catch { }
//if(创建无密码数据库){
//string conn = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + fileName + ".mdb";
//‍}else if(要加密码){
//string conn = "Provider=Microsoft.Jet.Oledb.4.0;Data Source=" + fileName + ";Jet OLEDB:Database Password=" + Pwd + ";Jet OLEDB:Engine Type=5";
//}
ADOX.Catalog catalog = new ADOX.Catalog();
try
{
catalog.Create(strConn);
}
catch
{ }
////连接数据库
ADODB.Connection cn = new ADODB.Connection();
cn.Open(strConn, null, null, -1);
catalog.ActiveConnection = cn;
//建立新表
ADOX.Table table = new ADOX.Table();
table.Name = "URLTable";
ADOX.Column column = new ADOX.Column();
column.ParentCatalog = catalog;
column.Type = ADOX.DataTypeEnum.adInteger;
column.Name = "RecordID";
column.DefinedSize = 9;
column.Properties["AutoIncrement"].Value = true;
table.Columns.Append(column, ADOX.DataTypeEnum.adInteger, 0);
//设置主键
//table.Keys.Append("", ADOX.KeyTypeEnum.adKeyPrimary, "ID", "", "");//现异常现在还没有搞明这是为什么
table.Columns.Append("URL", DataTypeEnum.adVarWChar, 255);
table.Columns.Append("TITLE", DataTypeEnum.adVarWChar, 255);
try
{
catalog.Tables.Append(table);
}
catch (Exception ex)
{
// MessageBox.Show(ex.Message);
}
//此处一定要关闭连接,否则添加数据时候会出错
table = null;
catalog = null;
Application.DoEvents();
}
void wirteAccess(string _url, string _title)
{
try
{
String sql = "insert into URLTable(URL,TITLE) values( '" + _url + "' , '" + _title + "')";
OleDbCommand cmd = new OleDbCommand(sql, myConn);
if (cmd.ExecuteNonQuery() > 0)//判断插入数据是否成功
{
Console.WriteLine("~~~~~~~~");
}
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
}
}
void readAccess()
{
try
{
OleDbCommand odCommand = myConn.CreateCommand();
odCommand.CommandText = "select URL,TITLE from URLTable"; //建立读取 C#操作Access之按列读取mdb
OleDbDataReader odrReader = odCommand.ExecuteReader();
while (odrReader.Read())
{
array_Access.Add(odrReader["URL"].ToString() + "|" + odrReader["TITLE"].ToString());
Console.WriteLine(odrReader["URL"].ToString());
}
odrReader.Close();
}
catch { }
}

private void btn_select_ok_Click(object sender, EventArgs e)
{
if (urltext.Text != "" && this.listView1.Items.Count != 0)
{
string condition = screen_text.Text;
if (condition != "")
{
listView1.Items.Clear();
foreach (string s in array_Js)
{
if (s.Contains(condition))
{
ListViewItem lvi = new ListViewItem();
lvi.ForeColor = Color.Red;
string[] temp = s.Split(new char[] { '★' }, StringSplitOptions.None);
lvi.Text = temp[1];
lvi.SubItems.Add("");
lvi.SubItems.Add("");
lvi.SubItems.Add(DateTime.Now.ToString());
string[] res = temp[0].ToString().Split(new string[] { "." }, StringSplitOptions.None);
string formate = res[res.Length - 1];
lvi.SubItems.Add("js");
this.listView1.Items.Add(lvi);

//richTextBox_url.Text += temp[0];
Console.WriteLine(condition);
}
}
}
}
}

private void urltext_TextChanged(object sender, EventArgs e)
{

}

private void Form1_Load(object sender, EventArgs e)
{
this.listView1.BeginUpdate(); //数据更新,UI暂时挂起,直到EndUpdate绘制控件,可以有效避免闪烁并大大提高加载速度
this.listView1.Columns.Add("地址", 360, HorizontalAlignment.Left);
this.listView1.Columns.Add("状态", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("标题", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("日期", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("类型", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("外部链接", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("内部链接", 120, HorizontalAlignment.Left);
this.listView1.EndUpdate(); //结束数据处理,UI界面一次性绘制。
}
string getRichTextJs(string _js, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(_js);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
return _html;
}
private void listView1_SelectedIndexChanged(object sender, EventArgs e)
{
foreach (ListViewItem lvi in listView1.SelectedItems) //选中项遍历
{
//listView1.Items.RemoveAt(lvi.Index); // 按索引移除
//listView1.Items.Remove(lvi); //按项移除
try
{
//System.Diagnostics.Process.Start(lvi.Text);
//string html = getRichTextJs(lvi.Text, Encoding.UTF8);
//richTextBox1.Text = html;
//richTextBoxVisible = true;
}
catch { }
}
}
void removeListview()
{
this.listView1.Items.Clear(); //只移除所有的项。
}
private void frm_MainForm_FormClosing(object sender, FormClosingEventArgs e)
{
DialogResult dr = MessageBox.Show("你确定要关闭此窗体么?", "关闭提示", MessageBoxButtons.YesNo, MessageBoxIcon.Information);//触发事件进行提示
if (dr == DialogResult.No)
{
e.Cancel = true;
}
else
{
e.Cancel = false;
myConn.Close();
}
}

private void screen_text_TextChanged(object sender, EventArgs e)
{
if (screen_text.Text == "")
{
btn_url_ok_Click(sender, e);
}
}

private void richTextBox1_TextChanged(object sender, EventArgs e)
{

}
private void listView1_MouseMove(object sender, MouseEventArgs e)
{
if (richTextBoxVisible)
{
if (richTextBox1.Text!= "")
{
richTextBox1.Visible = true;
richTextBox1.Left = e.X;
richTextBox1.Top = e.Y;
}
}
}

private void listView1_MouseDown(object sender, MouseEventArgs e)
{
if (e.Button == MouseButtons.Right)
{
foreach (ListViewItem lvi in listView1.SelectedItems) //选中项遍历
{
//listView1.Items.RemoveAt(lvi.Index); // 按索引移除
//listView1.Items.Remove(lvi); //按项移除
try
{
string html = getRichTextJs(lvi.Text, Encoding.UTF8);
richTextBox1.Text = html;
richTextBoxVisible = true;
Console.WriteLine("RightButton");
}
catch { }
}

}
else if (e.Button == MouseButtons.Left)
{
foreach (ListViewItem lvi in listView1.SelectedItems) //选中项遍历
{
try
{
System.Diagnostics.Process.Start(lvi.Text);
}
catch { }
}
this.richTextBox1.Visible = false;
}
}
private void richTextBox1_KeyDown(object sender, KeyEventArgs e)
{
this.richTextBox1.Visible = false;
this.richTextBox1.Text = "";
richTextBoxVisible = false;
}
}
}

只有不断学习,才可进步。
原文地址:https://www.cnblogs.com/onlyforliu/p/5726568.html