用Python3Request爬取王者荣耀皮肤,单线程爬取、多线程爬取

先找HERO代码:Herolist.json: https://pvp.qq.com/web201605/js/herolist.json
皮肤URL规律:https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/英雄编号/英雄编号-bigskin-皮肤编号.jpg

其他规律去别的博客看

单线程爬取

import requests
import json
import time

def hero(hero_name, hero_num, h_l):
    # 逐一遍历英雄
    num = 0  # 为了获取英雄的号码,定义一个变量
    for i in hero_num:
        # 逐一遍历皮肤,此处假定一个英雄最多10个皮肤
        for sk_num in range(1, 10):  # 从第一个开始,没有第0个皮肤
            hsl = h_l + str(i) + "/" + str(i) + "-bigskin-" + str(sk_num) + ".jpg"
            hl = requests.get(hsl)
            if hl.status_code == 200:#判断状态码,因为如果可正确访问的话 状态码为200 不可的话就是404
                print("此时正在下载:" + str(hero_name[num]) + str(sk_num) + "
")#输出一下免得自己不知道自己下载哪个文件
                with open("Hero/" + str(hero_name[num]) + str(sk_num) + ".jpg", "wb") as f:  #记得在运行路径下新建一个Hero文件夹,否则就把Hero/+ 这几个代码删掉 
                    f.write(hl.content)
            else:
                break  #否则的话就跳出

        num += 1

def main():
    url = "https://pvp.qq.com/web201605/js/herolist.json"
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
    response = requests.get(url,headers = header)
    hero_list = response.json()
    h_l = "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/"
    #提取英雄名字和数字
    hero_name = list(map(lambda x:x["cname"],hero_list))#这个lambda 自己下去看看,学习学习
    hero_num = list(map(lambda x:x["ename"],hero_list))
    print("HeroNumber:"+str(hero_name.__len__()))#输出英雄个数
    hero(hero_name,hero_num,h_l) #调用函数


if __name__ == '__main__':  

    main()

多线程爬取,效率超高,在爬数据的时候简直是利器!

import requests
import json
import threading
import time

def hero_1(hero_name, hero_num, h_l):
    # 逐一遍历英雄
    num = 0  # 为了获取英雄的号码
    for i in hero_num:
        # 逐一遍历皮肤,此处假定一个英雄最多10个皮肤
        for sk_num in range(1, 15):  # 从第一个开始,没有第0个皮肤
            hsl = h_l + str(i) + "/" + str(i) + "-bigskin-" + str(sk_num) + ".jpg"
            hl = requests.get(hsl)
            if hl.status_code == 200:
                print("此时正在下载:" + str(hero_name[num]) + str(sk_num) + "
")
                with open("Hero/" + str(hero_name[num]) + str(sk_num) + ".jpg", "wb") as f:
                    f.write(hl.content)
            else:
                break

        num += 1


def main():
    url = "https://pvp.qq.com/web201605/js/herolist.json"
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
    response = requests.get(url,headers = header)
    hero_list = response.json()
    h_l = "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/"
    #提取英雄名字和数字
    hero_name = list(map(lambda x:x["cname"],hero_list))
    hero_num = list(map(lambda x:x["ename"],hero_list))
    print("HeroNumber:"+str(hero_name.__len__()))
    hero_name1 = []
    hero_name2 = []
    hero_name3 = []
    hero_num1 = []
    hero_num2 = []
    hero_num3 = []
    for i in range(93):
        if i<30:
            hero_num1.append(hero_num[i])
            hero_name1.append(hero_name[i])
        elif i<60:
            hero_num2.append(hero_num[i])
            hero_name2.append(hero_name[i])
        else:
            hero_num3.append(hero_num[i])
            hero_name3.append(hero_name[i])
    t1 = threading.Thread(target=hero_1,args=(hero_name1,hero_num1,h_l))
    t2 = threading.Thread(target=hero_1,args=(hero_name2,hero_num2,h_l))
    t3 = threading.Thread(target=hero_1,args=(hero_name3,hero_num3,h_l))
    t1.start()
    t2.start()
    t3.start()


if __name__ == '__main__':

    main()

  

原文地址:https://www.cnblogs.com/wxzbk/p/10981098.html