最后几行代码应该是个bug,不知道怎么处理,在最后一两步,需要大神
import requests import re from lxml import etree from multiprocessing.dummy import Pool headers = {'User-Agen':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4098.3 Safari/537.36'} url = 'https://www.pearvideo.com/category_5' page_text = requests.get(url=url,headers=headers).text tree = etree.HTML(page_text) page_list = tree.xpath('//*[@id="listvideoListUl"]/li') # print(page_list) url_list = [] for li in page_list: lis = 'https://www.pearvideo.com/'+li.xpath('./div/a/@href')[0] video_name = li.xpath('./div/a/div[2]/text()')[0] + '.mp4' # print(video_name) lis_page_text = requests.get(url=lis,headers=headers).text ex = 'srcUrl="(.*?)",vdoUrl' video_url = re.findall(ex,lis_page_text,re.S) # print(video_url) dic = { 'name' : video_name, 'video_player' : video_url } url_list.append(dic) def Get_page_data(dic): urls = dic[video_url] page_content = requests.get(url=urls,headers=headers).content with open(dic[video_name],'wb') as fp: fp.write(page_content) pool = Pool(4) pool.map(Get_page_data,url_list)