屏蔽HTML中的script代码段

参考文章:
1)匹配嵌套的构造(较复杂)
2)解读C#正则表达式
3)[正则表达式] 可以解析HTML/XHTML页面的所有元素和结构的Regular Expression!



class Class1
{

string tag = @"(?:[\w-:]+)";
string attribute = @"(?:[\w-:]+)(?:(\s)*=(\s)*(?:[^\s\>\<]*|\""[^\""]*\""|\'[^\']*\'))?";
string name = @"(?:[\w-:]+)";
string argument = @"(?:[\w-:]+|\""[\s\S]*?\""|\'[\s\S]*?\')";

string beginningTag = @"(?:\<" + tag + @"(?:\s+" +attribute + @")*\s*(?:/)?\>)";
string endingTag = @"(?:\</" + tag + @"\>)";
string xmlComment = @"(?:\<!--[\s\S]*?--\>)";
string xmlDirective = @"(?:\<!" +name + @"(?:\s+" +argument + @")*\s*\>)";
string xmlCData = @"(?:\<!\[CDATA\[(?:[\s\S]*?)\]\]\>)";
string styleBlock = @"(?:(?:\<(?:Style)(?:\s+" +attribute + @")*\s*(?:/)?\>)(?:[\s\S]*?)(?:\</(?:Style)\>))";
string scriptBlock = @"(?:(?:\<(?:script)(?:\s+" +attribute + @")*\s*(?:/)?\>)(?:[\s\S]*?)(?:\</(?:script)\>))";
string xmlLiteral = @"(?:(?:(?<blank>[ ]+)|[^ \<\>])+)";

    public static string CapText(Match m) 
   {
      return "<!--"+m.Value+"-->";
   }
 
      
 
   static void Main()
   {
      string text = "<script > jaiowjefw </script>sdfsdf<script>fdf</ScripT> .";
      string pattern = Class1.scriptblock;
 
      string result = Regex.Replace(text, pattern, new MatchEvaluator(Class1.CapText), RegexOpetion.IgnoreCase | RegexOption.Compiled);
      System.Console.WriteLine(result);
  }
}
原文地址:https://www.cnblogs.com/huqingyu/p/178920.html