小爬虫爬一个贴吧网页的图片

#!/usr/bin/python
import re
import urllib

def getHtml(url):
        page = urllib.urlopen(url)
        html = page.read()
        return html

def getImg(html):
        reg = r'src="(.*?.jpg)" width'
        imgre = re.compile(reg)
        imglist = re.findall(imgre,html)
        x=0
        for imgurl in imglist:
                urllib.urlretrieve(imgurl,'%s.jpg' % x)
                x+=1

html =  getHtml("http://image.baidu.com/")
getImg(html)

  

 
python version 3.4 仿照别人的脚本修改后运行成功
 
#!/usr/bin/python
# -*- coding: utf-8 -*-

import re
import urllib.request as urllib2

def getHtml(url):
	page = urllib2.urlopen(url)
	html = page.read()
	return html

def getImage(html):
	imglist=[]
	imgre = re.compile(r'src="(http://imgsrc.*?.jpg)" size')
	html = html.decode('utf-8')
	imglist = imgre.findall(html)
	x=0
	for imgurl in imglist:
		urllib2.urlretrieve(imgurl,'E:\%s.jpg' %x)
		x+=1
html = getHtml("http://tieba.baidu.com/p/4866459683")
getImage(html)

  

 

<wiz_tmp_tag id="wiz-table-range-border" contenteditable="false" style="display: none;">

原文地址:https://www.cnblogs.com/raisok/p/11989656.html