并发编程——线程池演示

线程池演示

from concurrent.futures import ThreadPoolExecutor
import time

# pool只能创建100个线程
pool = ThreadPoolExecutor(100)


def task(line):
    print(line)
    time.sleep(10)


if __name__ == '__main__':
    for line in range(1000):
        pool.submit(task, line)


通过并发(同步)爬虫某个网站的小视频
import requests
import re
# import os
# import uuid
#
#
# # 1.发送请求,获取响应数据
# def get_page(url):
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response
#
#
# # 2.解析并提取主页id号
# def parse_page(response):
#     '''
#     https://www.pearvideo.com/video_1630253
#     https://www.pearvideo.com/video_1630042
#     '''
#     # 将所有电影的详情页id号,匹配获取,并放到列表中
#     id_list = re.findall('href="video_(.*?)"', response.text, re.S)
#     # print(len(id_list))
#     id_list = list(set(id_list))
#     # print(len(id_list))
#     return id_list
#
#
# def parse_detail(response):
#     '''
#     srcUrl="https://video.pearvideo.com/mp4/adshort/20191206/cont-1630253-14671892_adpkg-ad_hd.mp4"
#     srcUrl="(.*?)"
#     '''
#     mp4_url = re.findall('srcUrl="(.*?)"', response.text, re.S)
#     # print(mp4_url, 111111)
#     if mp4_url:
#         return mp4_url[0]
#
#
# # 3.保存数据
# def save_movie(movie_url):
#     response = get_page(movie_url)
#
#     movie_dir = r'D:项目路径python13期day30梨视频'
#     movie_path = os.path.join(
#         movie_dir, str(uuid.uuid4()) + '.mp4'
#     )
#     # print(movie_path)
#     with open(movie_path, 'wb') as f:
#         for line in response.iter_content():
#             f.write(line)
#
#
# if __name__ == '__main__':
#     response = get_page('https://www.pearvideo.com/')
#
#     # 解析提取所有电影详情页id号
#     id_list = parse_page(response)
#     # print(id_list)
#
#     # 循环拼接详情页链接
#     for id_num in id_list:
#         url = f'https://www.pearvideo.com/video_{id_num}'
#         # print(url)
#
#         # 往详情页发送请求,
#         detail_response = get_page(url)
#         # print(detail_response.text)
#
#         # # 解析电影详情页,并提取视频的存放的地址
#         mp4_url = parse_detail(detail_response)
#         print(mp4_url)
#
#         # # 发送请求获取视频真实数据
#         # movie_response = get_page(mp4_url)
#
#         # response.content
#         save_movie(mp4_url)



# 异步爬取梨视频
import requests
import re
import os
import uuid

from concurrent.futures import ThreadPoolExecutor
pool = ThreadPoolExecutor(100)


# 1.发送请求,获取响应数据
def get_page(url):
    print(f'发送get请求: {url}')
    response = requests.get(url)
    if response.status_code == 200:
        return response


# 2.解析并提取主页id号
def parse_page(response):
    '''
    https://www.pearvideo.com/video_1630253
    https://www.pearvideo.com/video_1630042
    '''
    # 将所有电影的详情页id号,匹配获取,并放到列表中
    id_list = re.findall('href="video_(.*?)"', response.text, re.S)
    # print(len(id_list))
    id_list = list(set(id_list))
    # print(len(id_list))
    return id_list


# 解析详情页,获取视频链接
def parse_detail(res):
    '''
    srcUrl="https://video.pearvideo.com/mp4/adshort/20191206/cont-1630253-14671892_adpkg-ad_hd.mp4"
    srcUrl="(.*?)"
    '''
    res2 = res.result()
    print(res2)

    movie_url = re.findall('srcUrl="(.*?)"', res2.text, re.S)
    print(movie_url)
    if movie_url:
        movie_url = movie_url[0]
        pool.submit(save_movie, movie_url)


# 3.保存数据
def save_movie(movie_url):

    # time.sleep(1)
    # 获取响应数据的过程是IO操作
    response = requests.get(movie_url)

    movie_dir = r'D:项目路径python13期day30梨视频'
    movie_path = os.path.join(
        movie_dir, str(uuid.uuid4()) + '.mp4'
    )
    # print(movie_path)
    with open(movie_path, 'wb') as f:
        for line in response.iter_content():
            f.write(line)


if __name__ == '__main__':
    response = get_page('https://www.pearvideo.com/')
    id_list = parse_page(response)
    for id_num in id_list:
        # 每一个视频详情页
        url = f'https://www.pearvideo.com/video_{id_num}'

        # 异步提交并爬取详情页任务
        # add_done_callback(parse_detail): 将get_page任务结束后的结果,扔给parse_detail函数
        # parse_detail函数接收的是一个对象,对象中的result()就是get_page函数的返回值。
        pool.submit(get_page, url).add_done_callback(parse_detail)

    import datetime

    print(datetime.datetime.now())
    # 21:54 ---> 18:45
我把月亮戳到天上 天就是我的 我把脚踩入地里 地就是我的 我亲吻你 你就是我的
原文地址:https://www.cnblogs.com/zhulipeng-1998/p/12863900.html