抓取QQ音乐歌单

抓取QQ音乐歌单

1.通过分析歌曲下载路径来分析所需参数: 通过比较, 得出其中歌曲下载url与参数vkey是可变的,
歌曲下载url中可变得值是请求歌单返回的歌曲数据的strMediaMid参数, 而vkey是通过请求歌单返回的歌曲数据的songmid参数再次请求另一个url返回vkey值
2.通过分析请求歌单url来分析所需参数: dissid
dissid为请求diss的返回数据中的dissid参数

import os
import re
import requests
from urllib.parse import urlencode
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    'origin': 'https://y.qq.com',
    'referer': 'https://y.qq.com/portal/playlist.html'
}


def fetch_url(url):
    try:
        r = requests.get(url, headers=headers)
        if r.status_code in [200, 201]:
            return r.json()
    except Exception as e:
        print(e)


def down_song(path, strMediaMid, vkey):
    params = {
        'guid': '5300386295',
        'vkey': vkey,
        'uin': '0',
        'fromtag': '66'
    }
    url = 'http://222.73.132.154/amobile.music.tc.qq.com/C400{}.m4a?'.format(strMediaMid)
    url += urlencode(params)
    r = requests.get(url, headers=headers)
    if r.status_code in [200, 201]:
        with open(path, 'wb') as f:
            f.write(r.content)


def get_vkey(songmid):
    url = 'https://u.y.qq.com/cgi-bin/musicu.fcg?'
    params = {
        '-': 'getplaysongvkey7256617694143965',
        'g_tk': '5381',
        'loginUin': '0',
        'hostUin': '0',
        'format': 'json',
        'inCharset': 'utf8',
        'outCharset': 'utf-8',
        'notice': '0',
        'platform': 'yqq.json',
        'needNewCode': '0',
        'data': '{"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"5300386295","songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,"platform":"20"}},"comm":{"uin":0,"format":"json","ct":24,"cv":0}}' % songmid
    }
    url += urlencode(params)
    result = fetch_url(url)
    vkey = result['req_0']['data']['midurlinfo'][0]['vkey']
    return vkey


def get_song_info(disstid):
    url = 'https://c.y.qq.com/qzone/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?'
    params = {
        'type': '1',
        'json': '1',
        'utf8': '1',
        'onlysong': '0',
        'disstid': disstid,
        'g_tk': '5381',
        'loginUin': '0',
        'hostUin': '0',
        'format': 'json',
        'inCharset': 'utf8',
        'outCharset': 'utf-8',
        'notice': '0',
        'platform': 'yqq.json',
        'needNewCode': '0',
    }
    url += urlencode(params)
    result = fetch_url(url)
    songlist = result['cdlist'][0]['songlist']
    for song in songlist:
        strMediaMid = song['strMediaMid']
        songMid = song['songmid']
        songname = song['songname']
        yield strMediaMid, songMid, songname


def get_dist_info(page):
    url = 'https://c.y.qq.com/splcloud/fcgi-bin/fcg_get_diss_by_tag.fcg?'
    params = {
        'picmid': '1',
        'rnd': '0.15993662911508766',
        'g_tk': '5381',
        'loginUin': '0',
        'hostUin': '0',
        'format': 'json',
        'inCharset': 'utf8',
        'outCharset': 'utf-8',
        'notice': '0',
        'platform': 'yqq.json',
        'needNewCode': '0',
        'categoryId': '10000000',
        'sortId': '5',
        'sin': int(page)*30-30,
        'ein': int(page)*30-1,
    }
    url += urlencode(params)
    result = fetch_url(url)
    disslist = result['data']['list']
    for diss in disslist:
        yield diss['dissid'], diss['dissname']


def main(page):
    for item in get_dist_info(page):
        dissid, dissname = item
        for item in get_song_info(dissid):
            strMediaMid, songMid, songname = item
            vkey = get_vkey(songMid)
            pattern = re.compile(r'[\/::*?"<>|
]+')
            songname = re.sub(pattern, " ", songname)
            dissname = re.sub(pattern, " ", dissname)
            if not os.path.exists('d://data/{}/'.format(dissname)):
                os.mkdir('d://data/{}/'.format(dissname))
            path = 'd://data/{0}/{1}.m4a'.format(dissname, songname)
            print("正在下载:{}".format(songname))
            down_song(path, strMediaMid, vkey)
            print("下载完成:{}".format(songname))


if __name__ == '__main__':
    page = 1
    main(page)

  

原文地址:https://www.cnblogs.com/frank-shen/p/10345925.html