asp自动解析网页中的图片地址,并将其保存到本地服务器

程序实现功能:自动将远程页面的文件中的图片下载到本地。 

程序代码
<% 
'将本文保存为 save2local.asp 
'测试:save2local.asp?url=http://ent.sina.com.cn/s/m/2003-11-11/1411231388.html 
'本文根据 chinahuman 的《用asp自动解析网页中的图片地址,并将其保存到本地服务器》改编和优化 
'自动创建目录,自动将原文件名更名,文件格式的限制以及其他功能的一些优化 
'自动保存网页文件中 http://.... 格式的图片到本地 
'转载请注明出处:http://www.jaron.cn http://www.csdn.net/develop 
'参数设置开始 
url = request("url") 
localaddr = server.MapPath("images_remote/") '保存到本地的目录 
localdir = "images_remote/" 'http 访问的相对路径 
AllowFileExt = "jpg|bmp|png|gif" '支持的文件名格式 
'参数设置完毕 



if createdir(localaddr) = false then 
response.Write "创建目录失败,请检查目录权限" 
response.End 
end if 
response.Write Convert2LocalAddr(url,localaddr,localdir) 



function Convert2LocalAddr(url,localaddr,localdir) 
'参数说明 
'url 页面地址 
'localaddr 保存本地的物理地址 
'localdir 相对路径 
strContent = getHTTPPage(url) 
Set objRegExp = New Regexp 
objRegExp.IgnoreCase = True 
objRegExp.Global = True 
objRegExp.Pattern = "<img.+?>" 
Set Matches =objRegExp.Execute(strContent) 
For Each Match in Matches 
RetStr = RetStr & GetRemoteImages(Match.value) 
Next 
ImagesArray=split(RetStr,"||") 
RemoteImage="" 
LocalImage="" 
for i=1 to ubound(ImagesArray) 
if ImagesArray(i)<>"" and instr(RemoteImage,ImagesArray(i))<1 then 
fname=baseurl&cstr(i&mid(ImagesArray(i),instrrev(ImagesArray(i),"."))) 
ImagesFileName = ImagesArray(i) 
AllowFileExtArray = split(AllowFileExt,"|") 
isGetFile = false 
for tmp = 0 to ubound(AllowFileExtArray) 
if lcase(GetFileExt(ImagesFileName)) = ALlowFileExtArray(tmp) then 
isGetFile=True 
end if 
next 
if isGetFile = true then 
newfilename = GenerateRandomFileName(fname) 
call Save2Local(ImagesFileName,localaddr & "/" & newfilename) 
RemoteImage=RemoteImage&"||"& ImagesFileName 
LocalImage=LocalImage&"||" & localdir & newfilename 
end if 
end if 
next 
arrnew=split(LocalImage,"||") 
arrall=split(RemoteImage,"||") 
for i=1 to ubound(arrnew) 
strContent=replace(strContent,arrall(i),arrnew(i)) 
next 
Convert2LocalAddr = strContent 
end function 



function GetRemoteImages(str) 
Set objRegExp1 = New Regexp 
objRegExp1.IgnoreCase = True 
objRegExp1.Global = True 
objRegExp1.Pattern = "http://.+? " 
set mm=objRegExp1.Execute(str) 
For Each Match1 in mm 
tmpaddr = left(Match1.value,len(Match1.value)-1) 
GetRemoteImages=GetRemoteImages&"||" & replace(replace(tmpaddr,"""",""),"'","") 
next 
end function 



function getHTTPPage(url) 
on error resume next 
dim http 
set http=Server.createobject("Msxml2.XMLHTTP") 
Http.open "GET",url,false 
Http.send() 
if Http.readystate<>4 then exit function 
getHTTPPage=bytes2BSTR(Http.responseBody) 
set http=nothing 
if err.number<>0 then err.Clear 
end function 



Function bytes2BSTR(vIn) 
dim strReturn 
dim i,ThisCharCode,NextCharCode 
strReturn = "" 
For i = 1 To LenB(vIn) 
ThisCharCode = AscB(MidB(vIn,i,1)) 
If ThisCharCode < &H80 Then 
strReturn = strReturn & Chr(ThisCharCode) 
Else 
NextCharCode = AscB(MidB(vIn,i+1,1)) 
strReturn = strReturn & Chr(CLng(ThisCharCode) * &H100 + CInt(NextCharCode)) 
i = i + 1 
End If 
Next 
bytes2BSTR = strReturn 
End Function 



function getHTTPimg(url) 
on error resume next 
dim http 
set http=server.createobject("MSXML2.XMLHTTP") 
Http.open "GET",url,false 
Http.send() 
if Http.readystate<>4 then exit function 
getHTTPimg=Http.responseBody 
set http=nothing 
if err.number<>0 then err.Clear 
end function 



function Save2Local(from,tofile) 
dim geturl,objStream,imgs 
geturl=trim(from) 
imgs=gethttpimg(geturl) 
Set objStream = Server.CreateObject("ADODB.Stream") 
objStream.Type =1 
objStream.Open 
objstream.write imgs 
objstream.SaveToFile tofile,2 
objstream.Close() 
set objstream=nothing 
end function 



function geturlencodel(byval url)'中文文件名转换 
Dim i,code 
geturlencodel="" 
if trim(Url)="" then exit function 
for i=1 to len(Url) 
code=Asc(mid(Url,i,1)) 
if code<0 Then code = code + 65536 
If code>255 Then 
geturlencodel=geturlencodel&"%"&Left(Hex(Code),2)&"%"&Right(Hex(Code),2) 
else 
geturlencodel=geturlencodel&mid(Url,i,1) 
end if 
next 
end function 



Function GenerateRandomFileName(ByVal szFilename) '根据原文件名,自动以日期YYYY-MM-DD-RANDOM格式生成新文件名 
Randomize 
ranNum = Int(90000 * Rnd) + 10000 
If Month(Now) < 10 Then c_month = "0" & Month(Now) Else c_month = Month(Now) 
If Day(Now) < 10 Then c_day = "0" & Day(Now) Else c_day = Day(Now) 
If Hour(Now) < 10 Then c_hour = "0" & Hour(Now) Else c_hour = Hour(Now) 
If Minute(Now) < 10 Then c_minute = "0" & Minute(Now) Else c_minute = Minute(Now) 
If Second(Now) < 10 Then c_second = "0" & Second(Now) Else c_second = Minute(Now) 
fileExt_a = Split(szFilename, ".") 
FileExt = LCase(fileExt_a(UBound(fileExt_a))) 
GenerateRandomFileName = Year(Now) & c_month & c_day & c_hour & c_minute & c_second & "_" & ranNum & "." & FileExt 
End Function 



Function CreateDIR(ByVal LocalPath) '建立目录的程序,如果有多级目录,则一级一级的创建 
On Error Resume Next 
LocalPath = Replace(LocalPath, "", "/") 
Set FileObject = server.CreateObject("Scripting.FileSystemObject") 
patharr = Split(LocalPath, "/") 
path_level = UBound(patharr) 
For I = 0 To path_level 
If I = 0 Then pathtmp = patharr(0) & "/" Else pathtmp = pathtmp & patharr(I) & "/" 
cpath = Left(pathtmp, Len(pathtmp) - 1) 
If Not FileObject.FolderExists(cpath) Then FileObject.CreateFolder cpath 
Next 
Set FileObject = Nothing 
If Err.Number <> 0 Then 
CreateDIR = False 
Err.Clear 
Else 
CreateDIR = True 
End If 
End Function 



function GetfileExt(byval filename) 
fileExt_a=split(filename,".") 
GetfileExt=lcase(fileExt_a(ubound(fileExt_a))) 
end function 
%> 
有一点要注意的是,这里只是分析了以 http开头的图片的地址。否则不下载图片,可以转换一下地址就行了,再提供几个小函数,分析地址用的: 
程序代码
Function findurl(thisurl) 
if thisurl <> "" Then 
findurl="" 
strlen=InStr(8,thisurl,"/") 
If strlen=0 Then strlen =1 
findurl=Mid(thisurl,strlen,28) 
end if 
end Function 



function findurlpath(thisurl) 
thisurl = replace(thisurl,"//","@@") 
ary_tmp = split(thisurl,"/") 
for tmp = 0 to ubound(ary_tmp) 
if tmp < ubound(ary_tmp) then 
urltmp = urltmp & "/" & ary_tmp(tmp) 
end if 
next 
urltmp = right(urltmp,len(urltmp)-1) 
findurlpath = replace(urltmp,"@@","//") & "/" 
end function 



function findurlhost(thisurl) 
thisurl = replace(thisurl,"//","@@") 
ary_tmp = split(thisurl,"/") 
urltmp = ary_tmp(0) 
findurlhost = replace(urltmp,"@@","//") & "/" 
findurlhost = left(findurlhost,len(findurlhost)-1) 
end function 

  

原文地址:https://www.cnblogs.com/uuxanet/p/3284940.html