1 from lxml import etree 2 from urllib import request 3 import urllib.parse 4 import time 5 import os 6 7 8 def handle_request(url,page): 9 if page == 1: 10 url = url.format('') 11 else: 12 url = url.format('_'+str(page)) 13 headers = { 14 "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36" 15 } 16 request = urllib.request.Request(url=url,headers=headers) 17 18 return request 19 20 def download_img(image_src): 21 dirpath = r'G:/untitled/zhiwu' 22 if not os.path.exists(dirpath): 23 os.mkdir(dirpath) 24 # 文件名 25 filename= os.path.basename(image_src) 26 # 文件路径 27 filepath = os.path.join(dirpath, filename) 28 # 发送请求保存图片 29 headers = { 30 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36" 31 } 32 request = urllib.request.Request(url=image_src, headers=headers) 33 response = urllib.request.urlopen(request) 34 print(response) 35 with open(filepath,'wb') as fp: 36 fp.write(response.read()) 37 38 def parse_content(content): 39 # 解析内容,获取图片 40 tree = etree.HTML(content) 41 image_list = tree.xpath('//div[@id="container"]/div/div/a/img/@src') 42 for image_src1 in image_list: 43 image_src = image_src1.split('/')[-1] 44 download_img(image_src) 45 46 def main(): 47 url = 'http://sc.chinaz.com/tupian/huadetupian{}.html' 48 start_page = int(input('请输入起始页码:')) 49 end_page = int(input('请输入结束页码:')) 50 for page in range(start_page, end_page + 1): 51 request = handle_request(url, page) 52 content = urllib.request.urlopen(request).read().decode() 53 parse_content(content) 54 time.sleep(1) 55 56 57 if __name__ == '__main__': 58 main()