C#内容格式刷html 转成txt

/// <summary>

        /// 内容格式刷

        /// </summary>

        /// <param name="strHtml">要格式的文本</param>

        /// <returns></returns>

        public static string HtmlToTxt(string strHtml)

        {

            string[] aryReg ={

            @"<script[^>]*?>.*?</script>",

            @"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""'])(\[""'tbnr]|[^7])*?7|w+)|.{0})|s)*?(/s*)?>",

            @"([ ])[s]+",

            @"&(quot|#34);",

            @"&(amp|#38);",

            @"&(lt|#60);",

            @"&(gt|#62);", 

            @"&(nbsp|#160);", 

            @"&(iexcl|#161);",

            @"&(cent|#162);",

            @"&(pound|#163);",

            @"&(copy|#169);",

            @"&#(d+);",

            @"-->",

            @"<!--.* "

            };

            string newReg = aryReg[0];

            string strOutput = strHtml;

            for (int i = 0; i < aryReg.Length; i++)

            {

                Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);

                strOutput = regex.Replace(strOutput, string.Empty);

            }

            strOutput.Replace("<", "");

            strOutput.Replace(">", "");

            strOutput.Replace(" ", "");

            return strOutput;

        }

原文地址:https://www.cnblogs.com/ken-admin/p/6405762.html