spider-bilibili

# 话不多说,直接上源码
import urllib, requests import urllib.request as req import re import json import subprocess headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36', 'referer': 'https://www.bilibili.com/' } video_path = './video/' movie_path = './movie/' def get_html_data(url_path): data_html = requests.get(url_path, headers=headers, proxies=proxies).text return data_html def get_download_url(): url = 'https://www.bilibili.com/video/av288087621/' data_html = get_html_data(url) title = re.findall('<span class="tit">(.*?)</span>', data_html)[0] json_data = re.findall('<script>window.__playinfo__=(.*?)</script>', data_html)[0] json_data = json.loads(json_data) video_url = json_data['data']['dash']['audio'][0]['backupUrl'][0] movie_url = json_data['data']['dash']['video'][0]['backupUrl'][0] download_data(title, video_url, movie_url) def download_data(title, video_url, movie_url): # 下载音频数据 video_data = requests.get(video_url, headers=headers, proxies=proxies).content # 下载视频数据 movie_data = requests.get(movie_url, headers=headers, proxies=proxies).content local_save_data(title, video_data, movie_data) def local_save_data(title, video_data, movie_data): # 保存音频数据 with open(video_path + title + '.mp3', 'wb') as w: w.write(video_data) # 保存视频数据 with open(movie_path + title + '.mp4', 'wb') as w: w.write(movie_data) merge_data(title) def merge_data(title): print("视频合成开始:%s" % title) COMMAND = f"ffmpeg -i {movie_path + title}.mp4 -i {video_path + title}.mp3 -c:v copy -c:a aac -strict experimental {movie_path}output.mp4" subprocess.Popen(COMMAND, shell=True) print("视频合成结束:%s" % title) if __name__ == '__main__': get_download_url()

  

原文地址:https://www.cnblogs.com/hello-python2020/p/14153990.html