No-Host-Given

import urllib
import urllib2
import re
import time
 
path="G:/123/"
path_file="1.txt"
 
def geturllist(text):
    text= text.decode('utf-8')  
    urllist = re.findall(r'(?=https:)+[^ ]+(?<=hd.jpg)',text) 
    return urllist
 
with open(path+path_file, "r") as f:  
    textall=f.read()
   urllist=geturllist(textall)
f.close() 
 
urllistonly = {}.fromkeys(urllist).keys()#删除数组里的重复值
 
for i in range(len(urllistonly)):  
    pic = urllistonly[i]
   pic =pic .replace('002','')
   pic =pic .replace('https:\\','http://')
   pic =pic .replace('\','/')
   urllib.urlretrieve(pic ,path+str(i)+".jpg")
print 'End!'
 
 
注意
https:\pic2.com50vc9ddaa4c92da.jpg
地址是无效的,所以会报错:no host given。
 
http://pic2.com/50/vc9ddaa4c92da.jpg
改成这样就不报错了。
 
 
原文地址:https://www.cnblogs.com/myshuzhimei/p/11776815.html