python练习六十九:urllib爬取练习

爬取图片,将链接中的图片取出来,并统计一共下载了多少图片

代码:

def fetch_pictures(url):
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
    req = urllib.request.Request(url, headers=headers)   
    urllib_read = urllib.request.urlopen(req).read()

    r = re.compile('<img class="lazy" src="(.*?)"')    #修改此处设置匹配对象

    picture_url_list = r.findall(urllib_read.decode('utf-8'))
    print(picture_url_list)
    
    os.mkdir('pictures') #创建pictures文件夹
    os.chdir(os.path.join(os.getcwd(), 'pictures')) #进入到pictures目录下
    count = 0
    for i in range(len(picture_url_list)):
        picture_name = str(i) + '.jpg'       
        try:
            urllib.request.urlretrieve(picture_url_list[i], picture_name)
            print("Success to download " + picture_url_list[i])
            count += 1
        except:
            print("Fail to download " + picture_url_list[i])
    print(count)

if __name__ == "__main__":
    url = "http://699pic.com"
    fetch_pictures(url)
原文地址:https://www.cnblogs.com/pinpin/p/10648624.html