python爬虫

import requests
from lxml import etree
from concurrent import futures

headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
}

def Htmlresp(url):
resp = requests.get(url, headers=headers)
html = etree.HTML(resp.text)
imgUrls = html.xpath('.//div[@class="Left_bar"]//img/@data-original')
return imgUrls

def DowlsImg(url):
filename = url.split('/')[-1]
resp = requests.get(url,headers=headers)
with open("img/"+filename,'wb') as file:
file.write(resp.content)
print("{filename}下载完成".format(filename=filename))


for i in range(5):
url = "http://www.win4000.com/meinvtag4_{0}.html".format(i+1)
imgUls = Htmlresp(url)
print("开始下载http://www.win4000.com/meinvtag4_{0}.html".format(i + 1))
ex = futures.ThreadPoolExecutor(max_workers=40)
for img in imgUls:
ex.submit(DowlsImg(img.replace("250_350","850_950")))


原文地址:https://www.cnblogs.com/kjtt/p/10894917.html