爬虫

from urllib.request import *
import re
#url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1548299141933_R&pv=&ic=&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E7%BE%8E%E5%A5%B3'
url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1548300267853_R&pv=&ic=&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E8%8C%83%E5%86%B0%E5%86%B0'
html = urlopen(url)
obj = html.read().decode()
urls = re.findall(r'"objURL":"(.*?)"',obj)

index = 1
for url in urls:
    if index < 50:
        try:
            print('正在下载第%d张'%(index))
            urlretrieve (url, '范冰冰'+ str(index) + '.jpg')
            index += 1
        except Exception:
            print('下载失败%d张' %index)
    else:
        print('下载完成')
        break
原文地址:https://www.cnblogs.com/xiaomai-rhce/p/10314359.html