.NET去掉HTML标记

 1 using System.Text.RegularExpressions;
 2 
 3 /// <summary>
 4 
 5   /// 去除HTML标记
 6 
 7   /// </summary>
 8 
 9   /// <param name="DeleteHTML">包括HTML的源码 </param>
10 
11   /// <returns>已经去除Html标签后的文字</returns>
12 
13   public static string  DeleteHTML(string Htmlstring)
14 
15   { 
16 
17    //删除脚本
18 
19    Htmlstring = Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",RegexOptions.IgnoreCase);
20 
21    //删除HTML
22 
23    Htmlstring = Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOptions.IgnoreCase);
24 
25    Htmlstring = Regex.Replace(Htmlstring,@"([
])[s]+","",RegexOptions.IgnoreCase);
26 
27    Htmlstring = Regex.Replace(Htmlstring,@"-->","",RegexOptions.IgnoreCase);
28 
29    Htmlstring = Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions.IgnoreCase);
30 
31    
32 
33    Htmlstring = Regex.Replace(Htmlstring,@"&(quot|#34);",""",RegexOptions.IgnoreCase);
34 
35    Htmlstring = Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexOptions.IgnoreCase);
36 
37    Htmlstring = Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOptions.IgnoreCase);
38 
39    Htmlstring = Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOptions.IgnoreCase);
40 
41    Htmlstring = Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",RegexOptions.IgnoreCase);
42 
43    Htmlstring = Regex.Replace(Htmlstring,@"&(iexcl|#161);","xa1",RegexOptions.IgnoreCase);
44 
45    Htmlstring = Regex.Replace(Htmlstring,@"&(cent|#162);","xa2",RegexOptions.IgnoreCase);
46 
47    Htmlstring = Regex.Replace(Htmlstring,@"&(pound|#163);","xa3",RegexOptions.IgnoreCase);
48 
49    Htmlstring = Regex.Replace(Htmlstring,@"&(copy|#169);","xa9",RegexOptions.IgnoreCase);
50 
51    Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);","",RegexOptions.IgnoreCase);
52 
53    Htmlstring.Replace("<","");
54 
55    Htmlstring.Replace(">","");
56 
57    Htmlstring.Replace("
","");
58 
59    Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
60 
61    return Htmlstring;
62 
63   }