Pyhon信息采集

Pyhon信息采集 - 喜马拉雅专辑歌曲

setting.py

# 喜马拉雅URL
XMLY_URL = "https://www.ximalaya.com/revision/play/album?albumId=%s&pageNum=%s&sort=-1&pageSize=30"
HEADER = {
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
}


# 数据库配置
import pymongo
conn = pymongo.MongoClient("127.0.0.1",27017)
MONGODB = conn["db3"]


# 目录配置
MUSIC_PATH = "Music"
COVER_PATH = "Cover"

data.py

import time

from setting import XMLY_URL, HEADER, MONGODB, MUSIC_PATH, COVER_PATH
import requests, os
from uuid import uuid4

my_url = XMLY_URL % ("17514344", "1")

res = requests.get(my_url, headers=HEADER)
data = res.json()
content_list = []

for music_info in data.get("data").get("tracksAudioPlay"):
    music = {
        "music": "",
        "cover": "",
        "title": ""
    }

    filename = uuid4()

    audio = requests.get(music_info.get("src"))
    audio_path = os.path.join(MUSIC_PATH, f"{filename}.mp3")
    with open(audio_path, "wb") as f:
        f.write(audio.content)

    cover = requests.get("http:" + music_info.get("trackCoverPath"))
    print(cover, "cover")
    cover_path = os.path.join(COVER_PATH, f"{filename}.jpg")
    with open(cover_path, "wb") as f:
        f.write(cover.content)

    music["cover"] = f"{filename}.jpg"
    music["music"] = f"{filename}.mp3"
    music["title"] = music_info.get("trackName")

    content_list.append(music)

    time.sleep(0.2)
    # MONGODB.content.insert_one(music)

MONGODB.content.insert_many(content_list)
原文地址:https://www.cnblogs.com/konghui/p/10900587.html