Filter(20160815)

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Data.OleDb;
using ADOX;
using System.Collections;
namespace Url_Filter
{
public partial class Form1 : Form
{
static string http = "http://";
static string type;
static int index = 0;
List<String> array_Access = new List<string>();
List<String> list_Res = new List<string>();
List<String> list_Js = new List<string>();
List<String> list_url = new List<string>();

static string strConn = " Provider = Microsoft.Jet.OLEDB.4.0 ; Data Source = UrlRecard.mdb ";
static OleDbConnection myConn = new OleDbConnection(strConn);
ADOX.Catalog catalog = new ADOX.Catalog();// 包含描述数据源模式目录的集合。
ADODB.Connection cn = new ADODB.Connection();//数据库连接

List<String> array_js_html = new List<string>();
static bool richTextBoxVisible = false;
ListViewItem g_lvi = new ListViewItem();
static int urlCount = 0;
string[] new_array_html_js;
string[] all_Js_html;
string[] allJs;
public Form1()
{
InitializeComponent();
this.MaximumSize = new Size(1114, 859);
this.MinimumSize = new Size(1114, 859);
this.listView2.Visible = false;
}

private void label1_Click(object sender, EventArgs e)
{
}

private void btn_url_ok_Click(object sender, EventArgs e)
{
array_Access.Clear();
array_js_html.Clear();
list_Js.Clear();
list_Res.Clear();
listView1.Items.Clear();
list_url.Clear();
screen_text.Text = "";
btn_url_ok.Enabled = false;
index = 0;
string tempUrl = urltext.Text.Trim();
string newUrl =tempUrl;
if (tempUrl.Contains(http))
{
int lenght = http.Length;
newUrl = urltext.Text.Substring(lenght, tempUrl.Length - lenght);
}
if(distinguishHtml(newUrl))
{
type = "html";
matchUrl("http://" + newUrl);
sencondGetHtmlSource(newUrl,Encoding.UTF8);
if (type == "html")
{
disposeRepeat();
}
}else
{
GetHtmlSource(newUrl, Encoding.UTF8);
}
}
string GetHtmlSource(string url, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(http + url);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
MessageBox.Show("未能解析的RUL");
btn_url_ok.Enabled = true;
return "";
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
matchUrl(_html);
GetHtmlImageUrlList(_html, url);
getHtmlJsFile(_html, url);
disposeRepeat();
return _html;
}
string sencondGetHtmlSource(string url, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request;
if (type == "html")
{
_request = (HttpWebRequest)WebRequest.Create(http + url);
}else
{
_request = (HttpWebRequest)WebRequest.Create(url);
}
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
return "";
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
GetHtmlImageUrlList(_html, url);
getHtmlJsFile(_html, url);
return _html;
}
void disposeRepeat()
{
this.WindowState = FormWindowState.Minimized;
string[] res;
string formate;
string[] new_array_html = new String[array_Access.Count]; //URL(All)
new_array_html = array_Access.ToArray();
string[] all_html = DelRepeatData(new_array_html);
Console.WriteLine(array_Access.Count.ToString());
Console.WriteLine(all_html.Length.ToString());
string[] temp;
foreach (String a in all_html)
{
ListViewItem lvi = new ListViewItem();
lvi.ForeColor = Color.Blue;
temp = a.Split(new char[] { '|' }, StringSplitOptions.None);
lvi.Text = temp[0];
if (!lvi.Text.StartsWith("http"))
{
lvi.Text = http + temp[0];
}
lvi.SubItems.Add("html");
lvi.SubItems.Add(temp[1]);
lvi.SubItems.Add(DateTime.Now.ToString());
res = temp[0].ToString().Split(new string[] { "." }, StringSplitOptions.None);
string html = res[res.Length - 1];
lvi.SubItems.Add("已访问");
list_url.Add(lvi.Text);
urlCount++;
}
string[] new_array_url_ = new String[list_url.Count]; //Url
new_array_url_ = list_url.ToArray();
string[] new_url = DelRepeatData(new_array_url_);

foreach (string s in new_url)
{
res = s.ToString().Split(new string[] { "." }, StringSplitOptions.None);
formate = res[res.Length - 1];
ListViewItem liv = new ListViewItem();
liv.ForeColor = Color.Blue;
liv.Text = s;
if (!liv.Text.StartsWith("http"))
{
liv.Text = http + s;
}
liv.SubItems.Add("html");
try
{
string[] temp1 = all_html[index].Split(new char[] { '|' }, StringSplitOptions.None);
if (s == temp1[0])
{
liv.SubItems.Add(temp1[1]);
}
else
{
liv.SubItems.Add(" ");
}
}
catch { }
liv.SubItems.Add(DateTime.Now.ToString());
string html = res[res.Length - 1];
liv.SubItems.Add("已访问");
this.listView1.Items.Add(liv);
sencondGetHtmlSource(liv.Text, Encoding.UTF8);
index++;
}

string[] new_array = new String[list_Res.Count]; //JS文件
new_array = list_Res.ToArray();
string[] allRes = DelRepeatData(new_array);
foreach (string s in allRes)
{
res = s.ToString().Split(new string[] { "." }, StringSplitOptions.None);
formate = res[res.Length - 1];
ListViewItem liv = new ListViewItem();
liv.ForeColor = Color.Green;
liv.Text = s;
if (!liv.Text.StartsWith("http"))
{
liv.Text = http + s;
}
liv.SubItems.Add(formate);
liv.SubItems.Add(" ");
liv.SubItems.Add(DateTime.Now.ToString());
this.listView1.Items.Add(liv);
}

string[] new_array_js = new String[list_Js.Count];
new_array_js = list_Js.ToArray();
allJs = DelRepeatData(new_array_js);
foreach (string js in allJs)
{
ListViewItem liv = new ListViewItem();
liv.Text =js;
if (!liv.Text.StartsWith("http"))
{
liv.Text = http + js;
}
res = js.ToString().Split(new string[] { "." }, StringSplitOptions.None);
formate = res[res.Length - 1];
liv.SubItems.Add(formate);
liv.SubItems.Add(" ");
liv.SubItems.Add(DateTime.Now.ToString());
liv.ForeColor = Color.Red;
this.listView1.Items.Add(liv);
}
this.listView1.ForeColor = Color.Black;
listView1.GridLines = true;

new_array_html_js = new String[array_js_html.Count];
new_array_html_js = array_js_html.ToArray();
all_Js_html = DelRepeatData(new_array_html_js);

this.WindowState = FormWindowState.Normal;
btn_url_ok.Enabled = true;
}
static string[] DelRepeatData(string[] a)
{
return a.GroupBy(p => p).Select(p => p.Key).ToArray();
}
public string[] GetHtmlImageUrlList(string htmlText, string _url)
{
Regex regImg = new Regex(@"<img[^<>]*?src[s ]*=[s ]*[""']?[s ]*(?<imgUrl>[^s ""'<>]*)[^<>]*?/?[s ]*>", RegexOptions.IgnoreCase);
//新建一个matches的MatchCollection对象 保存 匹配对象个数(img标签)
MatchCollection matches = regImg.Matches(htmlText);
int i = 0;
string[] sUrlList = new string[matches.Count];
//遍历所有的img标签对象
foreach (Match match in matches)
{
//获取所有Img的路径src,并保存到数组中
sUrlList[i] = match.Groups["imgUrl"].Value;
if (sUrlList[i].Length != 0)
{
string[] temp = sUrlList[i].Split(new char[] { '|' }, StringSplitOptions.None);
string com = "/";
string[] Domain = _url.Split(new string[] { com }, StringSplitOptions.RemoveEmptyEntries);
if (sUrlList[i].StartsWith(@"//"))
{
g_lvi.Text = http + sUrlList[i];
if (!g_lvi.Text.StartsWith("http"))
{
g_lvi.Text = http + temp[0];
}
}
else if (sUrlList[i].StartsWith(@"/"))
{
if (type == "html")
{
g_lvi.Text = Domain[0]+ sUrlList[i];
}else
{
try
{
g_lvi.Text = Domain[0] +"//"+ Domain[1] + sUrlList[i];
}
catch
{
g_lvi.Text = Domain[0] + "//" + sUrlList[i];
}
}
}
else
{
g_lvi.Text = sUrlList[i];
}
list_Res.Add(g_lvi.Text);
}
}
return sUrlList;
}
void getJsRes(string _js, Encoding charset)
{
string _html = string.Empty;
HttpWebRequest _request;
try
{
if(type=="html")
{
_request = (HttpWebRequest)WebRequest.Create(http + _js);
}else
{
_request = (HttpWebRequest)WebRequest.Create(_js);
}
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
//MessageBox.Show("未能解析的RUL");
return;
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
return;
}
array_js_html.Add(_html + "★" + _js);
}
void getHtmlJsFile(string _html, string _url)
{
List<string> srcList = new List<string>();
string pattern = "<script[^>]*?src="([^>]*?)"[^>]*?>";
MatchCollection mcs = Regex.Matches(_html, pattern, RegexOptions.IgnoreCase);
foreach (Match m in mcs)
{
srcList.Add(m.Groups[1].Value);
ListViewItem lvi = new ListViewItem();
lvi.ForeColor = Color.Blue;
string[] temp = m.Groups[1].Value.Split(new char[] { '|' }, StringSplitOptions.None);
string com = "/";
string[] Domain = _url.Split(new string[] { com }, StringSplitOptions.RemoveEmptyEntries);
lvi.Text = m.Groups[1].Value;
if (m.Groups[1].Value.StartsWith(@"//"))
{
if (!lvi.Text.StartsWith("http"))
{
lvi.Text = "http:" + m.Groups[1].Value;
}
}
else if (m.Groups[1].Value.StartsWith(@"/"))
{
if(type=="html")
{
lvi.Text = Domain[0] + m.Groups[1].Value;
}else
{
try
{
lvi.Text = Domain[0] + "//" + Domain[1] + m.Groups[1].Value;
}
catch
{
lvi.Text = Domain[0] + "//" + m.Groups[1].Value;
}
}
}
else
{
lvi.Text = m.Groups[1].Value;
}
list_Js.Add(lvi.Text);
getJsRes(lvi.Text, Encoding.UTF8);
}
}
void matchUrl(string _html)
{
if(type =="html")
{
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(_html);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, Encoding.UTF8))
{
_html = _reader.ReadToEnd();
}
}
}
catch { }
}

List<string> hrefList = new List<string>();//链接
List<string> nameList = new List<string>();//链接名称
string pattern = @"<as*href=(""|')(?<href>[sS.]*?)(""|').*?>s*(?<name>[sS.]*?)</a>";
MatchCollection mc = Regex.Matches(_html, pattern);
foreach (Match m in mc)
{
if (m.Success)
{
//加入集合数组
if (m.Groups["href"].Value.StartsWith(@"//"))
{
array_Access.Add("http:" + m.Groups["href"].Value + "|" + m.Groups["name"].Value);
}
else if(m.Groups["href"].Value.StartsWith(@"/"))
{
string com = "/";
string[] Domain = urltext.Text.Split(new string[] { com }, StringSplitOptions.RemoveEmptyEntries);

if(type=="html")
{
array_Access.Add(Domain[0] + com + Domain[1] + m.Groups["href"].Value + "|" + m.Groups["name"].Value);
}else
{
try
{
array_Access.Add(Domain[0]+ "//" + Domain[1] + m.Groups["href"].Value + "|" + m.Groups["name"].Value);
}catch{}
}
}
else
{
array_Access.Add(m.Groups["href"].Value + "|" + m.Groups["name"].Value);
}
}
}
}
void ConnetData()
{
/*
打开Access文件之前,清空下Access文件
*/
try
{
myConn.Open();
string strInsert = "DELETE FROM URLTable";
OleDbCommand inst = new OleDbCommand(strInsert, myConn);
inst.ExecuteNonQuery();
}
catch { }
//if(创建无密码数据库){
//string conn = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + fileName + ".mdb";
//‍}else if(要加密码){
//string conn = "Provider=Microsoft.Jet.Oledb.4.0;Data Source=" + fileName + ";Jet OLEDB:Database Password=" + Pwd + ";Jet OLEDB:Engine Type=5";
//}
try
{
catalog.Create(strConn);
}
catch
{ }
////连接数据库
//ADODB.Connection cn = new ADODB.Connection();
cn.Open(strConn, null, null, -1);
catalog.ActiveConnection = cn;
}
public void creatDatabase(string fileName)
{
//建立新表
ADOX.Table table = new ADOX.Table();//包含描述数据源模式目录的集合。
table.Name = "URLTable";
ADOX.Column column = new ADOX.Column();
column.ParentCatalog = catalog;
column.Type = ADOX.DataTypeEnum.adInteger;
column.Name = "RecordID";
column.DefinedSize = 9;
column.Properties["AutoIncrement"].Value = true;
table.Columns.Append(column, ADOX.DataTypeEnum.adInteger, 0);
//设置主键
//table.Keys.Append("", ADOX.KeyTypeEnum.adKeyPrimary, "ID", "", "");//现异常现在还没有搞明这是为什么
table.Columns.Append("URL", DataTypeEnum.adVarWChar, 255);

table.Columns.Append("TITLE", DataTypeEnum.adVarWChar, 255);
try
{
catalog.Tables.Append(table);
}
catch
{
// MessageBox.Show(ex.Message);
}
//此处一定要关闭连接,否则添加数据时候会出错
//table = null;
//catalog = null;
Application.DoEvents();
}
void wirteAccess(string _url, string _title)
{
try
{
String sql = "insert into URLTable(URL,TITLE) values( '" + _url + "' , '" + _title + "')";
OleDbCommand cmd = new OleDbCommand(sql, myConn);
if (cmd.ExecuteNonQuery() > 0)//判断插入数据是否成功
{
Console.WriteLine("~~~~~~~~");
}
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
}
}
void readAccess()
{
try
{
OleDbCommand odCommand = myConn.CreateCommand();
odCommand.CommandText = "select URL,TITLE from URLTable"; //建立读取 C#操作Access之按列读取mdb
OleDbDataReader odrReader = odCommand.ExecuteReader();
while (odrReader.Read())
{
//array_Access.Add(odrReader["URL"].ToString() + "|" + odrReader["TITLE"].ToString());
Console.WriteLine(odrReader["URL"].ToString());
}
odrReader.Close();
}
catch { }
}

private void btn_select_ok_Click(object sender, EventArgs e)
{
this.listView2.Clear();
this.WindowState = FormWindowState.Minimized;
btn_select_ok.Enabled = false;
if (urltext.Text != "" && this.listView1.Items.Count != 0)
{
string condition = screen_text.Text;
if (condition != "")
{
this.listView1.Hide();
this.listView2.Show();
foreach (string s in all_Js_html)
{
if (s.Contains(condition))
{
this.listView2.BeginUpdate(); //数据更新,UI暂时挂起,直到EndUpdate绘制控件,可以有效避免闪烁并大大提高加载速度
this.listView2.Columns.Add("地址", 360, HorizontalAlignment.Left);
this.listView2.Columns.Add("类型", 120, HorizontalAlignment.Left);
this.listView2.Columns.Add("标题", 120, HorizontalAlignment.Left);
this.listView2.Columns.Add("日期", 120, HorizontalAlignment.Left);
this.listView2.Columns.Add("状态", 120, HorizontalAlignment.Left);
this.listView2.Columns.Add("外部链接", 120, HorizontalAlignment.Left);
this.listView2.Columns.Add("内部链接", 120, HorizontalAlignment.Left);

ListViewItem lvi = new ListViewItem();
lvi.ForeColor = Color.Blue;
string[] temp = s.Split(new char[] { '★' }, StringSplitOptions.None);
lvi.Text = temp[1];
lvi.SubItems.Add("js");
lvi.SubItems.Add("");
lvi.SubItems.Add(DateTime.Now.ToString());
string[] res = temp[0].ToString().Split(new string[] { "." }, StringSplitOptions.None);
string formate = res[res.Length - 1];
lvi.SubItems.Add("");
richTextBox1.Text += temp[0];
this.listView2.Items.Add(lvi);
Console.WriteLine(condition);

this.listView2.EndUpdate(); //结束数据处理,UI界面一次性绘制。
}
}
}
}else
{
this.screen_text.Text = "";
MessageBox.Show("数据源不允许为空!");
}
this.WindowState = FormWindowState.Normal;
btn_select_ok.Enabled = true;
}

private void urltext_TextChanged(object sender, EventArgs e)
{
if (urltext.Text=="")
{
btn_url_ok.Enabled = false;
}else
{
btn_url_ok.Enabled = true;
}
}

private void Form1_Load(object sender, EventArgs e)
{
this.listView1.BeginUpdate(); //数据更新,UI暂时挂起,直到EndUpdate绘制控件,可以有效避免闪烁并大大提高加载速度
this.listView1.Columns.Add("地址", 360, HorizontalAlignment.Left);
this.listView1.Columns.Add("类型", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("标题", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("日期", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("状态", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("外部链接", 120, HorizontalAlignment.Left);
this.listView1.Columns.Add("内部链接", 120, HorizontalAlignment.Left);
this.listView1.EndUpdate(); //结束数据处理,UI界面一次性绘制。
}
string getRichTextJs(string _js, Encoding charset)
{
string _html = string.Empty;
try
{
HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(_js);
HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();
using (Stream _stream = _response.GetResponseStream())
{
using (StreamReader _reader = new StreamReader(_stream, charset))
{
_html = _reader.ReadToEnd();
}
}
}
catch (WebException ex)
{
using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))
{
_html = sr.ReadToEnd();
}
}
catch (Exception ex)
{
_html = ex.Message;
}
return _html;
}
private void listView1_SelectedIndexChanged(object sender, EventArgs e)
{
foreach (ListViewItem lvi in listView1.SelectedItems) //选中项遍历
{
//listView1.Items.RemoveAt(lvi.Index); // 按索引移除
//listView1.Items.Remove(lvi); //按项移除
try
{
//listView1.Items[listView1.SelectedIndices[0]].ForeColor = Color.White; //设置选中项的背景颜色
}
catch { }
}
}
void removeListview()
{
this.listView1.Items.Clear(); //只移除所有的项。
}
private void frm_MainForm_FormClosing(object sender, FormClosingEventArgs e)
{
DialogResult dr = MessageBox.Show("你确定要关闭此窗体么?", "关闭提示", MessageBoxButtons.YesNo, MessageBoxIcon.Information);//触发事件进行提示
if (dr == DialogResult.No)
{
e.Cancel = true;
}
else
{
e.Cancel = false;
myConn.Close();
}
}

private void screen_text_TextChanged(object sender, EventArgs e)
{
if (screen_text.Text == "")
{
this.listView2.Visible = false;
this.listView2.Items.Clear();
this.listView1.Visible = true;
}
}

private void richTextBox1_TextChanged(object sender, EventArgs e)
{
}
private void listView1_MouseMove(object sender, MouseEventArgs e)
{
if (richTextBoxVisible)
{
if (richTextBox1.Text!= "")
{
richTextBox1.Visible = true;
richTextBox1.Left = e.X + 20;
richTextBox1.Top = e.Y + 70;
}
}
}

private void listView1_MouseDown(object sender, MouseEventArgs e)
{

}
private void richTextBox1_KeyDown(object sender, KeyEventArgs e)
{
this.richTextBox1.Visible = false;
this.richTextBox1.Text = "";
richTextBoxVisible = false;
}

private void listView1_DoubleClick(object sender, EventArgs e)
{
foreach (ListViewItem lvi in listView1.SelectedItems) //选中项遍历
{
try
{
richTextBoxVisible = false;
System.Diagnostics.Process.Start(lvi.Text);
}
catch { }
}
this.richTextBox1.Visible = false;
}

private void listView1_MouseClick(object sender, MouseEventArgs e)
{
foreach (ListViewItem lvi in listView1.SelectedItems) //选中项遍历
{
//listView1.Items.RemoveAt(lvi.Index); // 按索引移除
//listView1.Items.Remove(lvi); //按项移除
try
{
string html = getRichTextJs(lvi.Text, Encoding.UTF8);
richTextBox1.Text = html;
richTextBoxVisible = true;
}
catch { }
}
}

private void listView1_Enter(object sender, EventArgs e)
{

}

private void listView2_SelectedIndexChanged(object sender, EventArgs e)
{

}

private void urltext_KeyPress(object sender, KeyPressEventArgs e)
{
if (e.KeyChar == (char)13)
{
btn_url_ok_Click(sender, e);
}
}
private void screen_text_KeyPress(object sender, KeyPressEventArgs e)
{
if(this.btn_select_ok.Enabled)
{
if (e.KeyChar == (char)13)
{
btn_select_ok_Click(sender, e);
}
}
}
private void screen_text_TextChanged_1(object sender, EventArgs e)
{
if(screen_text.Text == "")
{
this.btn_select_ok.Enabled = false;
this.listView1.Visible = true;
this.listView2.Visible = false;
}
if(this.listView1.Items.Count != 0 && (screen_text.Text != ""))
{
this.btn_select_ok.Enabled = true;
}
}

private void listView2_MouseDoubleClick(object sender, MouseEventArgs e)
{
foreach (ListViewItem lvi in listView2.SelectedItems) //选中项遍历
{
try
{
richTextBoxVisible = false;
System.Diagnostics.Process.Start(lvi.Text);
}
catch { }
}
}
bool distinguishHtml(string _type)
{
string[] res = _type.ToString().Split(new string[] { "." }, StringSplitOptions.None);
string formate = res[res.Length - 1];
if (formate.StartsWith("html"))
{
return true;
}
else
{
return false;
}
}
}
}

只有不断学习,才可进步。
原文地址:https://www.cnblogs.com/onlyforliu/p/5742314.html