单线程爬取图片

这是之前写的一个简单爬取妹纸图的爬虫,下面是源代码:


 1 # -*- coding: utf-8 -*-
 2 
 3 import requests,time,urllib.request,os
 4 from multiprocessing import Process
 5 from lxml import etree
 6 
 7 #os.chdir("meizhu")切换工作目录
 8 print (os.getcwd())#查看当前工作目录
 9 
10 headers = {"User-Agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}
11 
12 
13 def use_proxy():
14   proxy = urllib.request.ProxyHandler({'http':'proxy_addr'})
15   opener = urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
16 
17 def respon(imgurl):
18   req = urllib.request.Request(imgurl,headers=headers)
19   html = urllib.request.urlopen(req)
20   response=html.read().decode('utf-8')
21   #print(response)
22   selector = etree.HTML(str(response))
23   imgs =selector.xpath('//div[@class="pic"]/ul/li/a/img/@src')
24 
25   for imgname in imgs:
26     imgnames = str(imgname.split('/')[5].split('.')[0] + ".jpg")
27     #print(imgnames)
28 
29 file = urllib.request.urlretrieve(str(imgname), filename=imgnames)
30 print("爬取妹子图完成!!!!哈哈哈")
31 
32 if __name__=="__main__":
33 for i in range(1,100):
34 imgurl = 'http://www.mmjpg.com/home/'+str(i)
35 respon(imgurl)
原文地址:https://www.cnblogs.com/Huangsh2017Come-on/p/7294256.html