并发编程——线程池演示

线程池演示

from concurrent.futures import ThreadPoolExecutor
import time

# pool只能创建100个线程
pool = ThreadPoolExecutor(100)


def task(line):
    print(line)
    time.sleep(10)


if __name__ == '__main__':
    for line in range(1000):
        pool.submit(task, line)

通过并发（同步）爬虫某个网站的小视频
import requests
import re
# import os
# import uuid
#
#
# # 1.发送请求，获取响应数据
# def get_page(url):
#     response = requests.get(url)
#     if response.status_code == 200:
#         return response
#
#
# # 2.解析并提取主页id号
# def parse_page(response):
#     '''
#     https://www.pearvideo.com/video_1630253
#     https://www.pearvideo.com/video_1630042
#     '''
#     # 将所有电影的详情页id号，匹配获取，并放到列表中
#     id_list = re.findall('href="video_(.*?)"', response.text, re.S)
#     # print(len(id_list))
#     id_list = list(set(id_list))
#     # print(len(id_list))
#     return id_list
#
#
# def parse_detail(response):
#     '''
#     srcUrl="https://video.pearvideo.com/mp4/adshort/20191206/cont-1630253-14671892_adpkg-ad_hd.mp4"
#     srcUrl="(.*?)"
#     '''
#     mp4_url = re.findall('srcUrl="(.*?)"', response.text, re.S)
#     # print(mp4_url, 111111)
#     if mp4_url:
#         return mp4_url[0]
#
#
# # 3.保存数据
# def save_movie(movie_url):
#     response = get_page(movie_url)
#
#     movie_dir = r'D:项目路径python13期day30梨视频'
#     movie_path = os.path.join(
#         movie_dir, str(uuid.uuid4()) + '.mp4'
#     )
#     # print(movie_path)
#     with open(movie_path, 'wb') as f:
#         for line in response.iter_content():
#             f.write(line)
#
#
# if __name__ == '__main__':
#     response = get_page('https://www.pearvideo.com/')
#
#     # 解析提取所有电影详情页id号
#     id_list = parse_page(response)
#     # print(id_list)
#
#     # 循环拼接详情页链接
#     for id_num in id_list:
#         url = f'https://www.pearvideo.com/video_{id_num}'
#         # print(url)
#
#         # 往详情页发送请求，
#         detail_response = get_page(url)
#         # print(detail_response.text)
#
#         # # 解析电影详情页，并提取视频的存放的地址
#         mp4_url = parse_detail(detail_response)
#         print(mp4_url)
#
#         # # 发送请求获取视频真实数据
#         # movie_response = get_page(mp4_url)
#
#         # response.content
#         save_movie(mp4_url)



# 异步爬取梨视频
import requests
import re
import os
import uuid

from concurrent.futures import ThreadPoolExecutor
pool = ThreadPoolExecutor(100)


# 1.发送请求，获取响应数据
def get_page(url):
    print(f'发送get请求: {url}')
    response = requests.get(url)
    if response.status_code == 200:
        return response


# 2.解析并提取主页id号
def parse_page(response):
    '''
    https://www.pearvideo.com/video_1630253
    https://www.pearvideo.com/video_1630042
    '''
    # 将所有电影的详情页id号，匹配获取，并放到列表中
    id_list = re.findall('href="video_(.*?)"', response.text, re.S)
    # print(len(id_list))
    id_list = list(set(id_list))
    # print(len(id_list))
    return id_list


# 解析详情页，获取视频链接
def parse_detail(res):
    '''
    srcUrl="https://video.pearvideo.com/mp4/adshort/20191206/cont-1630253-14671892_adpkg-ad_hd.mp4"
    srcUrl="(.*?)"
    '''
    res2 = res.result()
    print(res2)

    movie_url = re.findall('srcUrl="(.*?)"', res2.text, re.S)
    print(movie_url)
    if movie_url:
        movie_url = movie_url[0]
        pool.submit(save_movie, movie_url)


# 3.保存数据
def save_movie(movie_url):

    # time.sleep(1)
    # 获取响应数据的过程是IO操作
    response = requests.get(movie_url)

    movie_dir = r'D:项目路径python13期day30梨视频'
    movie_path = os.path.join(
        movie_dir, str(uuid.uuid4()) + '.mp4'
    )
    # print(movie_path)
    with open(movie_path, 'wb') as f:
        for line in response.iter_content():
            f.write(line)


if __name__ == '__main__':
    response = get_page('https://www.pearvideo.com/')
    id_list = parse_page(response)
    for id_num in id_list:
        # 每一个视频详情页
        url = f'https://www.pearvideo.com/video_{id_num}'

        # 异步提交并爬取详情页任务
        # add_done_callback(parse_detail): 将get_page任务结束后的结果，扔给parse_detail函数
        # parse_detail函数接收的是一个对象，对象中的result()就是get_page函数的返回值。
        pool.submit(get_page, url).add_done_callback(parse_detail)

    import datetime

    print(datetime.datetime.now())
    # 21:54 ---> 18:45

我把月亮戳到天上天就是我的我把脚踩入地里地就是我的我亲吻你你就是我的