C#正则表达式将html代码中的所有img标签提取

/// <summary>     
        /// 取得HTML中所有图片的 URL。     
        /// </summary>     
        /// <param name="sHtmlText">HTML代码</param>     
        /// <returns>图片的URL列表</returns>     
        public static string[] GetHtmlImageUrlList(string sHtmlText)
        {
            if (string.IsNullOrEmpty(sHtmlText))
            {
                var s = new string[1];
                s[0] = "";
                return s;
            }
            // 定义正则表达式用来匹配 img 标签     
            Regex regImg = new Regex(@"<img[^<>]*?src[s	
]*=[s	
]*[""']?[s	
]*(?<imgUrl>[^s	
""'<>]*)[^<>]*?/?[s	
]*>", RegexOptions.IgnoreCase);

            // 搜索匹配的字符串     
            MatchCollection matches = regImg.Matches(sHtmlText);
            int i = 0;
            string[] sUrlList = new string[matches.Count];

            // 取得匹配项列表     
            foreach (Match match in matches)
                sUrlList[i++] = match.Groups["imgUrl"].Value;
            return sUrlList;
        }

  

原文地址:https://www.cnblogs.com/soulsjie/p/11858519.html