python图片小爬虫

 1 import re
 2 import urllib
 3 import os
 4 
 5 def rename(name): 
 6     name = name + '.jpg'
 7     return name  
 8 
 9 def getHtml(url):
10     page = urllib.urlopen(url)
11     html = page.read()
12     return html
13 
14 def getImg(html):
15     reg = r'src="(.+?.jpg)" pic_ext'
16     imgre = re.compile(reg)
17     imglist = re.findall(imgre,html)
18     
19     
20     os.chdir("E:\pic")  
21     os.getcwd() 
22     x=1
23     for imgurl in imglist:
24         img=urllib.urlopen(imgurl)
25           
26         
27         name=str(x)  
28         name = rename(name)  
29         print(name) 
30         x=x+1
31         
32         f=open(name,'wb')
33         f.write(img.read()) 
34         f.close()
35  
36  
37  
38     
39 html = getHtml("http://tieba.baidu.com/p/3553148164")
40 getImg(html)
41 print 'pic save!'

爬取的网页是  http://tieba.baidu.com/p/3553148164

图片保存在E盘pic文件夹下


爬取结果如下:


原文地址:https://www.cnblogs.com/vpoet/p/4659603.html