语音识别和文字转语音(基于百度AI)

花了一天时间,直接上代码

语音识别

#Author:Alex.Zhang
import os
import requests
import json
import base64

#首先配置必要的信息
baidu_server = 'https://aip.baidubce.com/oauth/2.0/token?'
grant_type = 'client_credentials'
client_id = 'umuduD7RyyO7OIsAGWHyuZeG' #API KEY
client_secret = 'ay0ih0NhwAInGCgIdpmbvSG9nbl0KEw3' #Secret KEY

#合成请求token的url
url = baidu_server+'grant_type='+grant_type+'&client_id='+client_id+'&client_secret='+client_secret

#获取token
res = requests.get(url).text
data = json.loads(res)
token = data['access_token']

#设置音频的属性,采样率,格式等
VOICE_RATE = 16000
FILE_NAME = '666.wav'
USER_ID = 'Xu.zh' #这里的id随便填填就好啦,我填的自己昵称
FILE_TYPE = 'wav'

#读取文件二进制内容
f_obj = open(FILE_NAME, 'rb')
content = base64.b64encode(f_obj.read())
speech = str(content, 'utf8')
size = os.path.getsize(FILE_NAME)

#json封装
datas = json.dumps({
    'format': FILE_TYPE,
    'rate': VOICE_RATE,
    'channel': 1,
    'cuid': USER_ID,
    'token': token,
    'speech':speech,
    'len': size})

#设置headers和请求地址url
headers = {'Content-Type':'application/json'}
url = 'https://vop.baidu.com/server_api'

#用post方法传数据
request = requests.post(url, datas, headers)
result = json.loads(request.text)
text = result['result']
if result['err_no'] == 0:
    print(text)
else:
    print('返回错误!')

文字转语音

from aip import AipSpeech
""" 你的 APPID AK SK """
APP_ID = ''
API_KEY = ''
SECRET_KEY=''
 
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
for i in range(4):
    if (i==0):
        content='你瞅啥?'
    if (i==1):
        content='瞅你咋地。'
    if (i==2):
        content='再瞅一个试试!'
    if (i==3):
        content='试试就试试。'
    if (i==0 or i==2):
        result  = client.synthesis(content,'zh',1,{'spd':0,'vol': 5,'per':3})
    if (i==1 or i==3):
        result  = client.synthesis(content,'zh',1,{'spd':0,'vol': 5,'per':4})
    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    filename=str(i)
    if not isinstance(result, dict):
        with open('文件的保存路径'+filename+'.mp3', 'wb') as f:
            f.write(result)
原文地址:https://www.cnblogs.com/klausage/p/11729395.html