使用正则表达式替换提取字符

 将 td th 标签替换,保留标签内容

  public  string TextFilter(string CommandText)
    {
        CommandText = Regex.Replace(CommandText, @"(?<part1>(<td>
*
*))(?<part2>(%?[^%]+))(?<part3>(</td>))", new MatchEvaluator(TextFilter), RegexOptions.IgnoreCase | RegexOptions.Multiline);
        CommandText = Regex.Replace(CommandText, @"(?<part1>(<td[^>]*>
*
*))(?<part2>(%?[^%]+))(?<part3>(</td>))", new MatchEvaluator(TextFilter), RegexOptions.IgnoreCase | RegexOptions.Multiline);        
        CommandText = Regex.Replace(CommandText, @"(?<part1>(<th[^>]*>
*
*))(?<part2>(%?[^%]+))(?<part3>(</th>))", new MatchEvaluator(TextFilter), RegexOptions.IgnoreCase | RegexOptions.Multiline);
        CommandText = Regex.Replace(CommandText, @"(?<part1>(<td[^>]*>
*
*))(?<part2>(</td>))", new MatchEvaluator(TextFilter2), RegexOptions.IgnoreCase | RegexOptions.Multiline);    
        return CommandText;
    }
    public string TextFilter(Match match)
    {
        string part1 = match.Groups["part1"].Value;
        string part2 = match.Groups["part2"].Value;
        string part3 = match.Groups["part3"].Value;
        return part2.Replace("&nbsp;", "").Trim();
    }

提取span里面的内容,如未包含span则返回原始字符串

  public string SpanText(string CommandText)
    {
        Regex reg = new Regex(@"(?<part1>(.*<span[^>]*>
*
*))(?<part2>(%?[^>]+))(?<part3>(</span>))", RegexOptions.IgnoreCase | RegexOptions.Multiline);
        var result = reg.Match(CommandText).Groups;

        if (result["part2"] != null && result.Count > 1)
        {
            return result["part2"].Value;
        }
        else
        {
            return CommandText;
        }        
    }
原文地址:https://www.cnblogs.com/CoreXin/p/5848875.html