一般在做爬虫或者CMS的时候经常需要提取 href链接或者是src地址。此时可以使用正则表达式轻松完成。 Regex reg = new Regex(@"(?is)<a[^>]*?href=(['""]?)(?<url>[^'""s>]+)1[^>]*>(?<text>(?:(?!</?a).)*)</a>"); MatchCollection mc = reg.Matches(yourStr); foreach (Match m in mc) { richTextBox2.Text += m.Groups["url"].Value + " ";//得到href值 richTextBox2.Text += m.Groups["text"].Value + " ";//得到<a><a/>中间的内容 } 方法2: <PRE class=" c-sharp;">Regex r; Match m; r = new Regex("href\s*=\s*(?:"(?<1>[^"]*)"|(?<1>\S+))", RegexOptions.IgnoreCase|RegexOptions.Compiled); for (m = r.Match(inputString); m.Success; m = m.NextMatch()) { Console.WriteLine("Found href " + m.Groups[1] + " at " + m.Groups[1].Index); } </PRE> 方法3:提取img src的 <PRE class=" c-sharp;">Regex reg = new Regex(@"(?i)<img[^>]*?ssrcs*=s*(['""]?)(?<src>[^'""s>]+)1[^>]*>"); MatchCollection mc = reg.Matches(yourStr); foreach (Match m in mc) { Console.Write(m.Groups["src"].Value + " "); } </PRE> 方法4: 提取img src <PRE class=" c-sharp;"> /// <summary> /// 获取Img的路径 /// </summary> /// <param name="htmlText">Html字符串文本</param> /// <returns>以数组形式返回图片路径</returns> public static string[] GetHtmlImageUrlList(string htmlText) { Regex regImg = new Regex(@"<img[^<>]*?src[s ]*=[s ]*[""']?[s ]*(?<imgUrl>[^s ""'<>]*)[^<>]*?/?[s ]*>", RegexOptions.IgnoreCase); //新建一个matches的MatchCollection对象 保存 匹配对象个数(img标签) MatchCollection matches = regImg.Matches(htmlText); int i = 0; string[] sUrlList = new string[matches.Count]; //遍历所有的img标签对象 foreach (Match match in matches) { //获取所有Img的路径src,并保存到数组中 sUrlList[i++] = match.Groups["imgUrl"].Value; } return sUrlList; }</PRE>