(来自QQ群"MSDN 微软开发者网络",作者:DEVIN )
代码
1 using System;
2 using System.Text;
3 using System.Text.RegularExpressions;
4
5 namespace Onfly.Common.Utility
6 {
7 /// <summary>
8 /// 过滤类
9 /// </summary>
10 public class Filter
11 {
12 /// <summary>
13 /// 需要过滤的字符(多个以|相隔)
14 /// </summary>
15 public static String keyWord = "";
16 /// <summary>
17 /// 需要过滤的字符(多个以|相隔)
18 /// </summary>
19 public static String KeyWord
20 {
21 get { return keyWord; }
22 set { keyWord = value; }
23 }
24 /// <summary>
25 /// 过滤 javascript
26 /// </summary>
27 /// <param name="content">需过滤文本内容</param>
28 /// <returns></returns>
29 public static String FilterScript(String content)
30 {
31 String commentPattern = @"(?'comment'<!--.*?--[ \n\r]*>)";
32 String embeddedScriptComments = @"(\/\*.*?\*\/|\/\/.*?[\n\r])";
33 String scriptPattern = String.Format(@"(?'script'<[ \n\r]*script[^>]*>(.*?{0}?)*<[ \n\r]*/script[^>]*>)", embeddedScriptComments);
34 String pattern = String.Format(@"(?s)({0}|{1})", commentPattern, scriptPattern);
35 return StripScriptAttributesFromTags(Regex.Replace(content, pattern, String.Empty, RegexOptions.IgnoreCase));
36 }
37 /// <summary>
38 /// 过滤javascript属性值(如onclick等)
39 /// </summary>
40 /// <param name="content">需过滤文本内容</param>
41 /// <returns></returns>
42 private static String StripScriptAttributesFromTags(String content)
43 {
44 String eventAttribs = @"on(blur|c(hange|lick)|dblclick|focus|keypress|(key|mouse)(down|up)|(un)?load
45 |mouse(move|o(ut|ver))|reset|s(elect|ubmit))";
46
47 String pattern = String.Format(@"(?inx)
48 \<(\w+)\s+
49 (
50 (?'attribute'
51 (?'attributeName'{0})\s*=\s*
52 (?'delim'['""]?)
53 (?'attributeValue'[^'"">]+)
54 (\3)
55 )
56 |
57 (?'attribute'
58 (?'attributeName'href)\s*=\s*
59 (?'delim'['""]?)
60 (?'attributeValue'javascript[^'"">]+)
61 (\3)
62 )
63 |
64 [^>]
65 )*
66 \>", eventAttribs);
67 Regex re = new Regex(pattern);
68 // 使用MatchEvaluator的委托
69 return re.Replace(content, new MatchEvaluator(StripAttributesHandler));
70 }
71 /// <summary>
72 /// 取得属性值
73 /// </summary>
74 /// <param name="m"></param>
75 /// <returns></returns>
76 private static String StripAttributesHandler(Match m)
77 {
78 if (m.Groups["attribute"].Success)
79 {
80 return m.Value.Replace(m.Groups["attribute"].Value, "");
81 }
82 else
83 {
84 return m.Value;
85 }
86 }
87 /// <summary>
88 /// 去掉javascript(scr链接方式)
89 /// </summary>
90 /// <param name="content">需过滤文本内容</param>
91 /// <returns></returns>
92 public static String FilterAHrefScript(String content)
93 {
94 String newstr = FilterScript(content);
95 String regexstr = @" href[ ^=]*= *[\s\S]*script *:";
96 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
97 }
98 /// <summary>
99 /// 去掉链接文件
100 /// </summary>
101 /// <param name="content">需过滤文本内容</param>
102 /// <returns></returns>
103 public static String FilterSrc(String content)
104 {
105 String newstr = FilterScript(content);
106 String regexstr = @" src *= *['""]?[^\.]+\.(js|vbs|asp|aspx|php|jsp)['""]";
107 return Regex.Replace(newstr, regexstr, @"", RegexOptions.IgnoreCase);
108 }
109 /// <summary>
110 /// 过滤HTML
111 /// </summary>
112 /// <param name="content">需过滤文本内容</param>
113 /// <returns></returns>
114 public static String FilterHtml(String content)
115 {
116 String newstr = FilterScript(content);
117 String regexstr = @"<[^>]*>";
118 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
119 }
120 /// <summary>
121 /// 过滤 OBJECT
122 /// </summary>
123 /// <param name="content">需过滤文本内容</param>
124 /// <returns></returns>
125 public static String FilterObject(String content)
126 {
127 String regexstr = @"(?i)<Object([^>])*>(\w|\W)*</Object([^>])*>";
128 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
129 }
130 /// <summary>
131 /// 过滤iframe
132 /// </summary>
133 /// <param name="content">需过滤文本内容</param>
134 /// <returns></returns>
135 public static String FilterIframe(String content)
136 {
137 String regexstr = @"(?i)<Iframe([^>])*>(\w|\W)*</Iframe([^>])*>";
138 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
139 }
140 /// <summary>
141 /// 过滤frameset
142 /// </summary>
143 /// <param name="content">需过滤文本内容</param>
144 /// <returns></returns>
145 public static String FilterFrameset(String content)
146 {
147 String regexstr = @"(?i)<Frameset([^>])*>(\w|\W)*</Frameset([^>])*>";
148 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
149 }
150 /// <summary>
151 /// 移除非法或不友好字符
152 /// </summary>
153 /// <param name="content">关键字列表,多个以 | 分隔</param>
154 /// <returns></returns>
155 public static String FilterBadWords(String content)
156 {
157 //这里的非法和不友好字符由你任意加,用“|”分隔,支持正则表达式,由于本Blog禁止贴非法和不友好字符,所以这里无法加上。
158 if (content == "")
159 return "";
160 String[] bwords = keyWord.Split('|');
161 if (bwords.Length < 1) return content;
162 int i, j;
163 String str;
164 StringBuilder sb = new StringBuilder();
165 for (i = 0; i < bwords.Length; i++)
166 {
167 str = bwords[i].ToString().Trim();
168 String regStr, toStr;
169 regStr = str;
170 Regex r = new Regex(regStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
171 Match m = r.Match(content);
172 if (m.Success)
173 {
174 j = m.Value.Length;
175 sb.Insert(0, "*", j);
176 toStr = sb.ToString();
177 content = Regex.Replace(content, regStr, toStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
178 }
179 sb.Remove(0, sb.Length);
180 }
181 return content;
182 }
183 /// <summary>
184 /// 过滤以上所有
185 /// </summary>
186 /// <param name="content">需过滤文本内容</param>
187 /// <returns></returns>
188 public static String FilterAll(String content)
189 {
190 content = FilterHtml(content);
191 content = FilterScript(content);
192 content = FilterAHrefScript(content);
193 content = FilterObject(content);
194 content = FilterIframe(content);
195 content = FilterFrameset(content);
196 content = FilterSrc(content);
197 content = FilterBadWords(content);
198 return content;
199 }
200 }
201 }
下载 /Files/llbofchina/codes/Filter.cs.txt
2 using System.Text;
3 using System.Text.RegularExpressions;
4
5 namespace Onfly.Common.Utility
6 {
7 /// <summary>
8 /// 过滤类
9 /// </summary>
10 public class Filter
11 {
12 /// <summary>
13 /// 需要过滤的字符(多个以|相隔)
14 /// </summary>
15 public static String keyWord = "";
16 /// <summary>
17 /// 需要过滤的字符(多个以|相隔)
18 /// </summary>
19 public static String KeyWord
20 {
21 get { return keyWord; }
22 set { keyWord = value; }
23 }
24 /// <summary>
25 /// 过滤 javascript
26 /// </summary>
27 /// <param name="content">需过滤文本内容</param>
28 /// <returns></returns>
29 public static String FilterScript(String content)
30 {
31 String commentPattern = @"(?'comment'<!--.*?--[ \n\r]*>)";
32 String embeddedScriptComments = @"(\/\*.*?\*\/|\/\/.*?[\n\r])";
33 String scriptPattern = String.Format(@"(?'script'<[ \n\r]*script[^>]*>(.*?{0}?)*<[ \n\r]*/script[^>]*>)", embeddedScriptComments);
34 String pattern = String.Format(@"(?s)({0}|{1})", commentPattern, scriptPattern);
35 return StripScriptAttributesFromTags(Regex.Replace(content, pattern, String.Empty, RegexOptions.IgnoreCase));
36 }
37 /// <summary>
38 /// 过滤javascript属性值(如onclick等)
39 /// </summary>
40 /// <param name="content">需过滤文本内容</param>
41 /// <returns></returns>
42 private static String StripScriptAttributesFromTags(String content)
43 {
44 String eventAttribs = @"on(blur|c(hange|lick)|dblclick|focus|keypress|(key|mouse)(down|up)|(un)?load
45 |mouse(move|o(ut|ver))|reset|s(elect|ubmit))";
46
47 String pattern = String.Format(@"(?inx)
48 \<(\w+)\s+
49 (
50 (?'attribute'
51 (?'attributeName'{0})\s*=\s*
52 (?'delim'['""]?)
53 (?'attributeValue'[^'"">]+)
54 (\3)
55 )
56 |
57 (?'attribute'
58 (?'attributeName'href)\s*=\s*
59 (?'delim'['""]?)
60 (?'attributeValue'javascript[^'"">]+)
61 (\3)
62 )
63 |
64 [^>]
65 )*
66 \>", eventAttribs);
67 Regex re = new Regex(pattern);
68 // 使用MatchEvaluator的委托
69 return re.Replace(content, new MatchEvaluator(StripAttributesHandler));
70 }
71 /// <summary>
72 /// 取得属性值
73 /// </summary>
74 /// <param name="m"></param>
75 /// <returns></returns>
76 private static String StripAttributesHandler(Match m)
77 {
78 if (m.Groups["attribute"].Success)
79 {
80 return m.Value.Replace(m.Groups["attribute"].Value, "");
81 }
82 else
83 {
84 return m.Value;
85 }
86 }
87 /// <summary>
88 /// 去掉javascript(scr链接方式)
89 /// </summary>
90 /// <param name="content">需过滤文本内容</param>
91 /// <returns></returns>
92 public static String FilterAHrefScript(String content)
93 {
94 String newstr = FilterScript(content);
95 String regexstr = @" href[ ^=]*= *[\s\S]*script *:";
96 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
97 }
98 /// <summary>
99 /// 去掉链接文件
100 /// </summary>
101 /// <param name="content">需过滤文本内容</param>
102 /// <returns></returns>
103 public static String FilterSrc(String content)
104 {
105 String newstr = FilterScript(content);
106 String regexstr = @" src *= *['""]?[^\.]+\.(js|vbs|asp|aspx|php|jsp)['""]";
107 return Regex.Replace(newstr, regexstr, @"", RegexOptions.IgnoreCase);
108 }
109 /// <summary>
110 /// 过滤HTML
111 /// </summary>
112 /// <param name="content">需过滤文本内容</param>
113 /// <returns></returns>
114 public static String FilterHtml(String content)
115 {
116 String newstr = FilterScript(content);
117 String regexstr = @"<[^>]*>";
118 return Regex.Replace(newstr, regexstr, String.Empty, RegexOptions.IgnoreCase);
119 }
120 /// <summary>
121 /// 过滤 OBJECT
122 /// </summary>
123 /// <param name="content">需过滤文本内容</param>
124 /// <returns></returns>
125 public static String FilterObject(String content)
126 {
127 String regexstr = @"(?i)<Object([^>])*>(\w|\W)*</Object([^>])*>";
128 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
129 }
130 /// <summary>
131 /// 过滤iframe
132 /// </summary>
133 /// <param name="content">需过滤文本内容</param>
134 /// <returns></returns>
135 public static String FilterIframe(String content)
136 {
137 String regexstr = @"(?i)<Iframe([^>])*>(\w|\W)*</Iframe([^>])*>";
138 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
139 }
140 /// <summary>
141 /// 过滤frameset
142 /// </summary>
143 /// <param name="content">需过滤文本内容</param>
144 /// <returns></returns>
145 public static String FilterFrameset(String content)
146 {
147 String regexstr = @"(?i)<Frameset([^>])*>(\w|\W)*</Frameset([^>])*>";
148 return Regex.Replace(content, regexstr, String.Empty, RegexOptions.IgnoreCase);
149 }
150 /// <summary>
151 /// 移除非法或不友好字符
152 /// </summary>
153 /// <param name="content">关键字列表,多个以 | 分隔</param>
154 /// <returns></returns>
155 public static String FilterBadWords(String content)
156 {
157 //这里的非法和不友好字符由你任意加,用“|”分隔,支持正则表达式,由于本Blog禁止贴非法和不友好字符,所以这里无法加上。
158 if (content == "")
159 return "";
160 String[] bwords = keyWord.Split('|');
161 if (bwords.Length < 1) return content;
162 int i, j;
163 String str;
164 StringBuilder sb = new StringBuilder();
165 for (i = 0; i < bwords.Length; i++)
166 {
167 str = bwords[i].ToString().Trim();
168 String regStr, toStr;
169 regStr = str;
170 Regex r = new Regex(regStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
171 Match m = r.Match(content);
172 if (m.Success)
173 {
174 j = m.Value.Length;
175 sb.Insert(0, "*", j);
176 toStr = sb.ToString();
177 content = Regex.Replace(content, regStr, toStr, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Multiline);
178 }
179 sb.Remove(0, sb.Length);
180 }
181 return content;
182 }
183 /// <summary>
184 /// 过滤以上所有
185 /// </summary>
186 /// <param name="content">需过滤文本内容</param>
187 /// <returns></returns>
188 public static String FilterAll(String content)
189 {
190 content = FilterHtml(content);
191 content = FilterScript(content);
192 content = FilterAHrefScript(content);
193 content = FilterObject(content);
194 content = FilterIframe(content);
195 content = FilterFrameset(content);
196 content = FilterSrc(content);
197 content = FilterBadWords(content);
198 return content;
199 }
200 }
201 }
下载 /Files/llbofchina/codes/Filter.cs.txt