C#字符串处理之清除Html&XML标签

///


///去除html标签
///思路:用正则匹配到html标签,然后replace即可
///

///
///
///
public static string ContentReplace(string input)
{
input = Regex.Replace(input, @"<([^>]*)>", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"([ ])[s]+", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&#(d+);", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(quot);", """, RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(amp);", "&", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(lt);", "<", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(gt);", ">", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(nbsp);", " ", RegexOptions.IgnoreCase);
//处理答案序号
input = Regex.Replace(input, @"$、", "", RegexOptions.IgnoreCase);
input.Replace("<", "");
input.Replace(">", "");
input.Replace(" ", "");
//去两端空格,中间多余空格
input = Regex.Replace(input.Trim(), "s+", " ");
return input;
}

原文地址:https://www.cnblogs.com/wecc/p/11360634.html