Python之多进程根据p站画师id爬取

Python之p站根据id爬取图片(多进程)

import requests
import os
import time
import re
from multiprocessing import Process
from concurrent.futures import ProcessPoolExecutor
def test(id_p):
    head = {
        'Referer': 'https://www.pixiv.net/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36',
        'cookie': 'PHPSESSID=43437028_7c06ec1fd0e152e26fa0dab9c9fa919e'
    }

    headss = {
        'Referer': 'https://www.pixiv.net',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
    }
    su = 0
    zp = 'https://www.pixiv.net/ajax/user/' + id_p + '/profile/all'

    ####os创建文件夹
    if not os.path.exists(f'H:图片P站作者id:{id_p}'):
        os.mkdir(f'H:图片P站作者id:{id_p}')
    res = requests.get(zp, headers=head)
    date = res.json()
    # 生成图片路径
    url_jpg = []
    for k in date.get('body').get('illusts').keys():
        url_jpg.append('https://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + k)
    # print(url_jpg)
    for i in url_jpg:
        res_id = requests.get(i, headers=head)
        date_id = res_id.text
        url = ''.join(re.findall('"original":"(.*?)"', date_id))
        url = url.replace('\', '')
        rese = requests.get(url, headers=headss)
        with open(f'H:图片P站作者id:{id_p}{url.split("/")[-1]}', 'wb') as fw:
            fw.write(rese.content)
            fw.flush()
        print(f'
--------{id_p}--------------{url.split("/")[-1]}------{su + 1}----------------',end='')
        if i == url_jpg[-1]:
            print(f'
--------{id_p}--------------{url.split("/")[-1]}------{su + 1}----------------',end='
')
        su += 1
        time.sleep(0.2)
    time.sleep(0.5)
    print(f'-----------{id_p}作品获取完成----------')



if __name__ == '__main__':
    pool = ProcessPoolExecutor(3)
    while True:
        id_p = input('输入作者id生成网址')
        pool.submit(test,id_p)



原文地址:https://www.cnblogs.com/ledgua/p/11574060.html