HTML标签过滤方案

取决于不同的解决机制:

解决方案1:

在数据库中存入标签的HTML转换符,按照原样输出。

在存入数据库之前,加上server.htmlEncode(txtboxName.text);

解决方案2:

在数据库中不存入HTML标签的有关信息,过滤HTML标签,只显示文字。

存入数据库之前加上过滤函数过滤便可:checkStr(txtboxName.text)或者StripHTML(txtboxName.text)或者NoHTML(txtboxName.text);(提供三个过滤函数,任选一个,添加命名空间:using System.Text.RegularExpressions;

以下代码均调试通过:

        /// <summary>

        /// HTML过滤方法一

        /// </summary>

        /// <param name="html"></param>

        /// <returns></returns>

        public string checkStr(string html)

        {

            System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" on[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

            html = regex1.Replace(html, ""); //过滤<script></script>标记

            html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性

            html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件

            html = regex4.Replace(html, ""); //过滤iframe

            html = regex5.Replace(html, ""); //过滤frameset

            html = regex6.Replace(html, ""); //过滤frameset

            html = regex7.Replace(html, ""); //过滤frameset

            html = regex8.Replace(html, ""); //过滤frameset

            html = regex9.Replace(html, "");

            html = html.Replace(" ", "");

            html = html.Replace("</strong>", "");

            html = html.Replace("<strong>", "");

            return html;

        }

 

#region 过滤掉html代码

 

 

 ///   <summary>

        ///   方法二:去除HTML标记

        ///   </summary>

        ///   <param   name="StripHtml">包括HTML的源码  </param>

        ///   <returns>已经去除后的文字</returns>

 

        public static string StripHTML(string strHtml)

        {

            string[] aryReg ={

                                @"<script[^>]*?>.*?</script>",

 

                                @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",

                                @"([\r\n])[\s]+",

                                @"&(quot|#34);",

                                @"&(amp|#38);",

                                @"&(lt|#60);",

                                @"&(gt|#62);",

                                @"&(nbsp|#160);",

                                @"&(iexcl|#161);",

                                @"&(cent|#162);",

                                @"&(pound|#163);",

                                @"&(copy|#169);",

                                @"&#(\d+);",

                                @"-->",

                                @"<!--.*\n"

                               };

 

            string[] aryRep = {

                                "",

                                "",

                                "",

                                "\"",

                                "&",

                                "<",

                                ">",

                                " ",

                                "\xa1",//chr(161),

                                "\xa2",//chr(162),

                                "\xa3",//chr(163),

                                "\xa9",//chr(169),

                                "",

                                "\r\n",

                                ""

                               };

 

            string newReg = aryReg[0];

            string strOutput = strHtml;

            for (int i = 0; i < aryReg.Length; i++)

            {

                System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(aryReg[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                strOutput = regex.Replace(strOutput, aryRep[i]);

            }

            strOutput.Replace("<", "");

            strOutput.Replace(">", "");

            strOutput.Replace("\r\n", "");

            return strOutput;

        }

        #endregion

 

 

 

 

 

///   <summary>

        ///   方法三:去除HTML标记

        ///   </summary>

        ///   <param   name="NoHTML">包括HTML的源码  </param>

        ///   <returns>已经去除后的文字</returns>

        public static string NoHTML(string Htmlstring)

        {

            //删除脚本

            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",

              RegexOptions.IgnoreCase);

            //删除HTML

            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",

              RegexOptions.IgnoreCase);

            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",

              RegexOptions.IgnoreCase);

            Htmlstring.Replace("<", "");

            Htmlstring.Replace(">", "");

            Htmlstring.Replace("\r\n", "");

            Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

            return Htmlstring;

        }

 

原文地址:https://www.cnblogs.com/shineqiujuan/p/1335683.html