# coding=utf-8
import requests
from bs4 import BeautifulSoup
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
}
#获取下载链接后,下载图片的函数
这里可以看我的这篇有说 https://www.cnblogs.com/kaibindirver/p/9950498.html
def download_file(url):
print('Downding %s' %url)
local_filename = url.split('/')[-1]
r = requests.get(url, stream=True, headers=headers)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
return local_filename
url = 'http://jandan.net/drawings'
soup = BeautifulSoup(requests.get(url, headers=headers).text, 'html.parser') #套路格式化网页
#src链接要符合下面要求
def valid_img(src):
return src.endswith('jpg') and 'img.jandan.net' in src
#获取下载链接
for img in soup.find_all('img', src=valid_img):
src = img['src']
if not src.startswith('http'):
src = 'http:' + src
download_file(src)