自动识别HTML代码里的图片链接,并下载到服务器的指定目录(开源)

做新闻文章,我们或多或少地转载别人的成果,那怎么样把图片也下载到自已的服务器里呢?我这有一段代码,敬请指教!

using System;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;

namespace zhang.Common
{
    
public class HanlerFiles
    
{
        
private string[] GetImgTag(string htmlStr)
        
{
            Regex regObj 
= new Regex("<img.+?>", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            
string[] strAry = new string[regObj.Matches(htmlStr).Count];
            
int i = 0;
            
foreach (Match matchItem in regObj.Matches(htmlStr))
            
{
                strAry[i] 
= GetImgUrl(matchItem.Value);
                i
++;
            }

            
return strAry;
        }



        
private string GetImgUrl(string imgTagStr)
        
{
            
string str = "";
            Regex regObj 
= new Regex("http://.+.(?:jpg|gif|bmp|png)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
            
foreach (Match matchItem in regObj.Matches(imgTagStr))
            
{
                str 
= matchItem.Value;
            }

            
return str;
        }


        
/// <summary>
        
/// 根椐Html内空自动识别图像文件,并下载到服务器指定目录
        
/// </summary>
        
/// <param name="strHTML"></param>
        
/// <param name="path"></param>
        
/// <returns></returns>

        public int SaveUrlPics(ref string strHTML, string path)
        
{
            
string[] imgurlAry = GetImgTag(strHTML);
            
try
            
{
                
for (int i = 0; i < imgurlAry.Length; i++)
                
{
                    
//WebRequest req = WebRequest.Create(imgurlAry[i]);
                    string preStr = System.DateTime.Now.ToString() + "_";
                    preStr 
= preStr.Replace("-""");
                    preStr 
= preStr.Replace(":""");
                    preStr 
= preStr.Replace(" """);
                    WebClient wc 
= new WebClient();
                    wc.DownloadFile(imgurlAry[i], HttpContext.Current.Server.MapPath(path) 
+ "/" + preStr + imgurlAry[i].Substring(imgurlAry[i].LastIndexOf("/"+ 1));
                    strHTML 
= strHTML.Replace(imgurlAry[i], path + preStr + imgurlAry[i].Substring(imgurlAry[i].LastIndexOf("/"+ 1));
                }

                
            }

            
catch (Exception ex)
            
{
                
//return ex.Message;
            }

            
return imgurlAry.Length;
        }




    }

}


至强工作室 www.haotaoci.com
原文地址:https://www.cnblogs.com/zhang/p/830308.html