语音切割

# 设置分句的标志符号;可以根据实际需要进行修改
# cutlist = "。!?".decode('utf-8')

cutlist = ['
', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']


# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']


# 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
def FindToken(cutlist, char):
    if char in cutlist:
        return True
    else:
        return False


# 进行分句的核心函数
def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
    l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
    line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空

    for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
        if FindToken(cutlist, i):  # 如果当前字符是分句符号
            line.append(i)  # 将此字符放入临时列表中
            l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
            line = []  # 将符号列表清空,以便下次分句使用
        else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
            line.append(i)
    return l


r_s = []
# 以下为调用上述函数实现从文本文件中读取内容并进行分句。
# with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
#     for lines in fr:
#         l = Cut(list(cutlist), list(lines))
#         for line in l:
#             if len(line.replace(' ', '')) == 0:
#                 continue
#             if line.strip() != "":
#                 line=line.strip()
#                 r_s.append(line)
#
#                 # li = line.strip().split()
#                 # for sentence in li:
#                 #     r_s.append(sentence)
str_ = ''

# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
']

with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
    for lines in fr:
        if len(lines.replace(' ', '')) == 0:
            continue
        # str_='{}{}'.format(str_,lines.replace('
',''))
        # if len(lines.replace(' ','').replace('
',''))==0:
        #     continue
        str_ = '{}{}'.format(str_, lines)
        # l = Cut(list(cutlist), list(lines))
        # for line in l:
        #     if line.strip() != "":
        #         line=line.strip()


from aip import AipSpeech

bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l


import math
bd_str_per_limit=1024
rep_times=math.ceil(len(str_)/bd_str_per_limit)

for i in range(rep_times):
    cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]

    mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    result = client.synthesis(str_, 'zh', 1, {
        'vol': 5,
    })
    uid = 'liukeyuanCAKE_whole_para'
    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')

    if not isinstance(result, dict):
        # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
        f_w = '{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit',i, '.mp3')
        # ,'g3db',uid,'g3uid'
        #  with open('auido.b.mp3', 'wb') as f:
        with open(f_w, 'wb') as f:
            f.write(result)

import os
os._exit(2)

  

换行符影响 

# 设置分句的标志符号;可以根据实际需要进行修改
# cutlist = "。!?".decode('utf-8')

cutlist = ['
', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']


# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']


# 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
def FindToken(cutlist, char):
    if char in cutlist:
        return True
    else:
        return False


# 进行分句的核心函数
def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
    l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
    line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空

    for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
        if FindToken(cutlist, i):  # 如果当前字符是分句符号
            line.append(i)  # 将此字符放入临时列表中
            l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
            line = []  # 将符号列表清空,以便下次分句使用
        else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
            line.append(i)
    return l


r_s = []
# 以下为调用上述函数实现从文本文件中读取内容并进行分句。
# with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
#     for lines in fr:
#         l = Cut(list(cutlist), list(lines))
#         for line in l:
#             if len(line.replace(' ', '')) == 0:
#                 continue
#             if line.strip() != "":
#                 line=line.strip()
#                 r_s.append(line)
#
#                 # li = line.strip().split()
#                 # for sentence in li:
#                 #     r_s.append(sentence)
str_ = ''

# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
']

with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
    for lines in fr:
        if len(lines.replace(' ', '')) == 0:
            continue
        # str_='{}{}'.format(str_,lines.replace('
',''))
        # if len(lines.replace(' ','').replace('
',''))==0:
        #     continue
        str_ = '{}{}'.format(str_, lines.replace('
',''))
        # l = Cut(list(cutlist), list(lines))
        # for line in l:
        #     if line.strip() != "":
        #         line=line.strip()


from aip import AipSpeech

bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l


import math
bd_str_per_limit=1024
rep_times=math.ceil(len(str_)/bd_str_per_limit)

for i in range(rep_times):
    cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
    print(cut_str)
    print('----------------------------------')
    mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    result = client.synthesis(cut_str, 'zh', 1, {
        'vol': 5,
    })
    uid = 'liukeyuanCAKE_whole_para'
    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')

    if not isinstance(result, dict):
        # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
        f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','bd_str_per_limit','_NO_trN_',i, '.mp3')
        # ,'g3db',uid,'g3uid'
        #  with open('auido.b.mp3', 'wb') as f:
        with open(f_w, 'wb') as f:
            f.write(result)

import os
os._exit(2)

  

# 设置分句的标志符号;可以根据实际需要进行修改
# cutlist = "。!?".decode('utf-8')

cutlist = ['
', '	', '。', ';', '?', '.', ';', '?', '...', '、、、', ':']


# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',']
# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',',','、']


# 检查某字符是否分句标志符号的函数;如果是,返回True,否则返回False
def FindToken(cutlist, char):
    if char in cutlist:
        return True
    else:
        return False


# 进行分句的核心函数
def Cut(cutlist, lines):  # 参数1:引用分句标志符;参数2:被分句的文本,为一行中文字符
    l = []  # 句子列表,用于存储单个分句成功后的整句内容,为函数的返回值
    line = []  # 临时列表,用于存储捕获到分句标志符之前的每个字符,一旦发现分句符号后,就会将其内容全部赋给l,然后就会被清空

    for i in lines:  # 对函数参数2中的每一字符逐个进行检查 (本函数中,如果将if和else对换一下位置,会更好懂)
        if FindToken(cutlist, i):  # 如果当前字符是分句符号
            line.append(i)  # 将此字符放入临时列表中
            l.append(''.join(line))  # 并把当前临时列表的内容加入到句子列表中
            line = []  # 将符号列表清空,以便下次分句使用
        else:  # 如果当前字符不是分句符号,则将该字符直接放入临时列表中
            line.append(i)
    return l


r_s = []
# 以下为调用上述函数实现从文本文件中读取内容并进行分句。
# with open('mybaidu.parp.b.txt','r',encoding='utf-8') as fr :
#     for lines in fr:
#         l = Cut(list(cutlist), list(lines))
#         for line in l:
#             if len(line.replace(' ', '')) == 0:
#                 continue
#             if line.strip() != "":
#                 line=line.strip()
#                 r_s.append(line)
#
#                 # li = line.strip().split()
#                 # for sentence in li:
#                 #     r_s.append(sentence)
str_ = ''

# cutlist = [ '。', ';', '?', '.', ';', '?', '...', '、、、',':',':',',','
']

with open('mybaidu.parp.b.txt', 'r', encoding='utf-8') as fr:
    for lines in fr:
        # if len(lines.replace(' ', '')) == 0:
        #     continue
        # str_='{}{}'.format(str_,lines.replace('
',''))
        if len(lines.replace(' ','').replace('
',''))==0:
            continue
        str_ = '{}{}'.format(str_, lines.replace('
',''))
        # l = Cut(list(cutlist), list(lines))
        # for line in l:
        #     if line.strip() != "":
        #         line=line.strip()


from aip import AipSpeech

bd_k_l = ['11059852', '5Kk01GtG2fjCwpzEkwdn0mjw', 'bp6Wyx377Elq7RsCQZzTBgGUFzLm8G2A']
APP_ID, API_KEY, SECRET_KEY = bd_k_l


import math
#bd_str_per_limit=1024
bd_str_per_limit=300
rep_times=math.ceil(len(str_)/bd_str_per_limit)

for i in range(rep_times):
    cut_str=str_[i*bd_str_per_limit:i*bd_str_per_limit+bd_str_per_limit]
    print(cut_str)
    print('----------------------------------')
    mp3_dir = 'C:\Users\sas\PycharmProjects\produce_video\result_liukeyun\'

    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    result = client.synthesis(cut_str, 'zh', 1, {
        'vol': 5,
    })
    uid = 'CAKE'
    # 识别正确返回语音二进制 错误则返回dict 参照下面错误码
    f_w = '{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid', '.mp3')

    if not isinstance(result, dict):
        # f_w = '{}{}{}{}'.format(mp3_dir, 'g3uid', uid, '.mp3')
        f_w = '{}{}{}{}{}{}{}{}'.format(mp3_dir, 'g3db', uid, 'g3uid','noBRBlankLine','',i, '.mp3')
        # ,'g3db',uid,'g3uid'
        #  with open('auido.b.mp3', 'wb') as f:
        with open(f_w, 'wb') as f:
            f.write(result)

import os
os._exit(2)

  

原文地址:https://www.cnblogs.com/rsapaper/p/8795540.html