百度音乐爬虫代码

百度音乐爬虫代码_实现

import requests
import re
import json

# 模拟浏览器去下载mp3
# url = 'http://zhangmenshiting.qianqian.com/data2/music/e93d963095b109ff47de85f1b41ffdd1/522883870/522883870.mp3?xcode=23787929c9177917ed47f60d337fa3fd'
# # 发送请求
# response = requests.get(url)
# # print(response.content)  # content 二进制数据
# # 持久化
# with open('test.mp3', 'wb') as f:
#     f.write(response.content)
# 下载mp3的id
# http://music.baidu.com/search?key=刘德华


def get_sids_by_name(name):
    url = 'http://music.baidu.com/search'
    data = {
        'key': name
    }
    response = requests.get(url, params=data)
    response.encoding = 'utf-8'
    html = response.text
    # print(html)
    ul = re.findall(r'<ul.*</ul>', html, re.S)[0]

    # print(ul)
    # 获取sid sid&quot;:551560464
    sids = re.findall(r'sid&quot;:(d+),', ul, re.S)
    return sids


# 根据 song_id 下载mp3
def get_mp3_by_id(song_id):
    song_id = song_id

    api = 'http://tingapi.ting.baidu.com/v1/restserver/ting?method=baidu.ting.song.play&format=jsonp&callback=jQuery17205500581185420972_1513324047403&songid=%s&_=1513324048127' % song_id
    response = requests.get(api)
    data = response.text
    data = re.findall(r'((.*))', data)[0]
    # json
    data = json.loads(data)

    # print(data)
    # print(data['songinfo'])
    title = data['songinfo']['title']
    mp3_url = data['bitrate']['show_link']
    # print(title, mp3_url)
    # 下载mp3
    mp3_data = requests.get(mp3_url).content
    # 持久化

    with open('%s.mp3' % title, 'wb') as f:
        f.write(mp3_data)

sids = get_sids_by_name('刘德华')
for sid in sids:
    print(sid)
    get_mp3_by_id(sid)