爬虫爬取网页图片(分页)

爬虫爬取网页图片(分页)

不分页 源码:

import requests
import re
url = 'https://www.qiushibaike.com/imgrank/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
res = requests.get(url,headers = headers).text
# print(res)
urls = re.findall('<img src="(.*?)" alt=".*?" class="illustration" width="100%" height="auto">',res)
print(urls)
for url1 in urls:
	filename = url1.split('/')[-1]
	urll = 'https:'+url1
	response = requests.get(urll,headers = headers)
	with open(filename,'wb') as f:
		f.write(response.content)


分页的话需要设置一个通用的url模板
url = 'https://www.qiushibaike.com/imgrank/page/%d/'
for page in range(1,4):
newurl = format(url%page)

import requests
import re
url = 'https://www.qiushibaike.com/imgrank/page/%d/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
for page in range(1,4):
	newurl = format(url%page)
	res = requests.get(newurl,headers = headers).text
# print(res)
urls = re.findall('<img src="(.*?)" alt=".*?" class="illustration" width="100%" height="auto">',res)
print(urls)
for url1 in urls:
	filename = url1.split('/')[-1]
	urll = 'https:'+url1
	response = requests.get(urll,headers = headers)
	with open(filename,'wb') as f:
		f.write(response.content)
原文地址:https://www.cnblogs.com/serendipity-my/p/13670039.html