过滤HTML代码

  public static string FilterHtml(string string_include_html)
        {
            string[] HtmlRegexArr ={ 
                                       #region Html 正则数组
                            @"<script[^>]*?>.*?</script>",  
                             @"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""'])(\[""'tbnr]|[^7])*?7|w+)|.{0})|s)*?(/s*)?>", 
                             @"([
])[s]+", 
                             @"&(quot|#34);", 
                             @"&(amp|#38);", 
                             @"&(lt|#60);", 
                             @"&(gt|#62);", 
                             @"&(nbsp|#160);", 
                             @"&(iexcl|#161);", 
                             @"&(cent|#162);", 
                             @"&(pound|#163);", 
                             @"&(copy|#169);", 
                             @"&#(d+);", 
                             @"-->", 
                             @"<!--.*
"  
                                        #endregion 
                                   };
            string[] HtmlReplaceArr = { 
                                       #region 替换Html字符
                             "", 
                             "", 
                             "", 
                             """, 
                             "&", 
                             "<", 
                             ">", 
                             " ", 
                             "xa1", 
                             "xa2", 
                             "xa3", 
                             "xa9", 
                             "", 
                             "
", 
                             ""  
                            #endregion
                                      };
            string string_no_html = null;
            for (int i = 0; i < HtmlRegexArr.Length; i++)
            {
                System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(HtmlRegexArr[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                string_no_html = regex.Replace(string_include_html, HtmlReplaceArr[i]);
            }
            string_no_html.Replace("<", "");
            string_no_html.Replace(">", "");
            string_no_html.Replace("
", "");
            return string_no_html;
        }

 以上来至网络,但个人认为还是不行。故有以下自己写的:

        /// <summary> 
        /// 将Html标签转化为空 
        /// </summary> 
        /// <param name="strHtml">待转化的字符串</param> 
        /// <returns>经过转化的字符串</returns> 
        public static string GetStringNoHtml(string string_include_html)
        {
            if (String.IsNullOrEmpty(string_include_html))
            {
                return "";
            }
            else
            {
                string_include_html = string_include_html.Replace("<BR>", "
").Replace("<br>", "
");
                //第一种
                string string_no_html = System.Text.RegularExpressions.Regex.Replace(string_include_html, @"(<script[^>]*?>.*?</script>)|(<(.[^>]*)>)", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                return string_no_html.Replace("&nbsp;", " ");
                //第二种
                //return System.Text.RegularExpressions.Regex.Replace(string_include_html, @"(<script[^>]*?>.*?</script>)|(<(.[^>]*)>)|(&nbsp;)", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
            }
        }
 
 
 
 
 
讓眾人的薪枝構起這團熱情的火焰
原文地址:https://www.cnblogs.com/valeb/p/3637143.html