10 实例4:用多线程对视频的爬取

 1 """使用多线程爬取梨视频视频数据"""
 2 """https://www.cnblogs.com/zivli/p/11614103.html"""
 3 
 4 
 5 import requests
 6 import re
 7 from lxml import etree
 8 from multiprocessing.dummy import Pool
 9 
10 url = 'https://www.pearvideo.com/category_5'
11 page_text = requests.get(url=url).text
12 
13 tree = etree.HTML(page_text)
14 # 1、获取页面中视频详情地址
15 li_list = tree.xpath('//ul[@id="listvideoListUl"]/li')
16 url_list = []
17 for i in li_list:
18     # 2、构造出每个视频的详情地址
19     detail_url = "https://www.pearvideo.com/" + i.xpath('./div/a/@href')[0]
20     name = i.xpath('./div/a/div[2]/text()')[0] + '.mp4'
21     # 3、向视频详情地址发起请求
22     detail_page = requests.get(url=detail_url).text
23     # 4、从response中解析出视频的真实地址
24     ex = 'srcUrl="(.*?)",vdoUrl'
25     video_url = re.findall(ex, detail_page)[0]
26     dic = {
27         'name': name,
28         'url': video_url
29     }
30     url_list.append(dic)
31 
32 
33 def get_video_data(d):
34     """
35     向视频地址发起请求,二进制写入本地文件
36     :param d:
37     :return:
38     """
39     url = d['url']
40     data = requests.get(url=url).content
41     print(d['name'], "正在下载。。。")
42     with open(d['name'], 'wb') as f:
43         f.write(data)
44         print(d['name'], "下载成功。。。")
45 
46 
47 # 使用多进程处理
48 pool = Pool(4)
49 pool.map(get_video_data, url_list)
50 pool.close()
51 pool.join()
原文地址:https://www.cnblogs.com/sruzzg/p/13041965.html