时域特征--线性预测系数(LPC)以及LPCC

欲知更多,请关注公众号:音频探险记

LPC

线性预测系数的基本思想:由于语音样点之间存在着相关性,那么当前点/未来点可以用过去的p个样本点进行预测,即
其中就是要求的LPC,P表示预测阶数。
好处:可以得到声道模型及其模型参数的方法,广泛用于语音识别以及语音合成中。

import librosa
import python_speech_features
import soundfile as sf
import numpy as np
def lpc(y, order):
    dtype = y.dtype.type
    ar_coeffs = np.zeros(order + 1, dtype=dtype)
    ar_coeffs[0] = dtype(1) # 1.0
    ar_coeffs_prev = np.zeros(order + 1, dtype=dtype)
    ar_coeffs_prev[0] = dtype(1)
    # 前向和后向的预测误差
    fwd_pred_error = y[1:]
    bwd_pred_error = y[:-1]
    den = np.dot(fwd_pred_error, fwd_pred_error) + np.dot(bwd_pred_error, bwd_pred_error)
    for i in range(order):
        if den <= 0:
            raise FloatingPointError("numerical error, input ill-conditioned?")
        reflect_coeff = dtype(-2) * np.dot(bwd_pred_error, fwd_pred_error) / dtype(den)
        ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev
        for j in range(1, i+2):
            ar_coeffs[j] = ar_coeffs_prev[j] + reflect_coeff * ar_coeffs_prev[i - j + 1]
        # 前向预测误差和后向预测误差更新
        fwd_pred_error_tmp = fwd_pred_error
        fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error
        bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp
        q = dtype(1) - reflect_coeff ** 2
        den = q * den - bwd_pred_error[-1]**2 - fwd_pred_error[0]**2
        fwd_pred_error = fwd_pred_error[1:]
        bwd_pred_error = bwd_pred_error[:-1]
    return ar_coeffs
y, sr = sf.read('q1.wav')
frame_size = 160
num_frames = len(y) // frame_size
print(lpc(y, 32))
'''
[ 1.00000000e+00 -3.95327600e+00  8.29868847e+00 -1.27752183e+01
  1.60320420e+01 -1.71512784e+01  1.59802135e+01 -1.29520778e+01
  8.83717438e+00 -4.58646820e+00  8.91619704e-01  1.80827086e+00
 -3.30606685e+00  3.68847432e+00 -3.13823922e+00  2.08868507e+00
 -1.04485702e+00  1.47865339e-01  6.35567557e-01 -1.15391128e+00
  1.35048967e+00 -1.27918423e+00  9.65718801e-01 -5.09474786e-01
  5.94380366e-03  4.28867366e-01 -7.08129489e-01  8.19126446e-01
 -7.55779509e-01  5.73570390e-01 -3.63595930e-01  1.78320700e-01
 -4.54597679e-02]
'''

LPCC

LPCC全称线性预测倒谱系数(linear predictive cepstral coefficient, LPCC),可以在计算得到LPC后,如果如下的计算公式计算得到LPCC
LPCC是LPC系数在倒谱域的表示,计算量小易于实现,对元音的描述能力较好,对辅音的描述能力较差,抗噪性能差[1]
[1] 数字语音处理及MATLAB仿真
相应代码如下

import librosa
import python_speech_features
import soundfile as sf
import numpy as np
def lpc(y, order):
    dtype = y.dtype.type
    ar_coeffs = np.zeros(order + 1, dtype=dtype)
    ar_coeffs[0] = dtype(1) # 1.0
    ar_coeffs_prev = np.zeros(order + 1, dtype=dtype)
    ar_coeffs_prev[0] = dtype(1)
    # 前向和后向的预测误差
    fwd_pred_error = y[1:]
    bwd_pred_error = y[:-1]
    den = np.dot(fwd_pred_error, fwd_pred_error) + np.dot(bwd_pred_error, bwd_pred_error)
    for i in range(order):
        if den <= 0:
            raise FloatingPointError("numerical error, input ill-conditioned?")
        reflect_coeff = dtype(-2) * np.dot(bwd_pred_error, fwd_pred_error) / dtype(den)
        ar_coeffs_prev, ar_coeffs = ar_coeffs, ar_coeffs_prev
        for j in range(1, i+2):
            ar_coeffs[j] = ar_coeffs_prev[j] + reflect_coeff * ar_coeffs_prev[i - j + 1]
        # 前向预测误差和后向预测误差更新
        fwd_pred_error_tmp = fwd_pred_error
        fwd_pred_error = fwd_pred_error + reflect_coeff * bwd_pred_error
        bwd_pred_error = bwd_pred_error + reflect_coeff * fwd_pred_error_tmp
        q = dtype(1) - reflect_coeff ** 2
        den = q * den - bwd_pred_error[-1]**2 - fwd_pred_error[0]**2
        fwd_pred_error = fwd_pred_error[1:]
        bwd_pred_error = bwd_pred_error[:-1]
    return ar_coeffs
y, sr = sf.read('q1.wav')
# 得到lpc系数
lpc_coeff = lpc(y, 32)
lpc_order = 32
# lpcc 系数个数
lpcc_order = 48
lpcc_coeff = np.zeros(lpcc_order)
lpcc_coeff[0] = lpc_coeff[0]
for m in range(1, lpc_order):
    lpcc_coeff[m] = lpc_coeff[m]
    for k in range(0,m):
        lpcc_coeff[m] = lpc_coeff[m] + lpcc_coeff[k] * lpc_coeff[m - k] * k / m
for m in range(lpc_order, lpcc_order):
    for k in range(m - lpc_order, m):
        lpcc_coeff[m] = lpcc_coeff[m] + lpcc_coeff[k] * lpc_coeff[m - k] * k / m
print(lpcc_coeff)
print(lpc_coeff)
'''
[ 1.00000000e+00 -3.95327600e+00  1.61128841e+01 -5.52410036e+01
  1.79819243e+02 -5.85851356e+02  1.94600697e+03 -6.60704007e+03
  2.28633585e+04 -8.03469568e+04  2.85871218e+05 -1.02738713e+06
  3.72307949e+06 -1.35861755e+07  4.98734770e+07 -1.84019377e+08
  6.82011923e+08 -2.53758247e+09  9.47444366e+09 -3.54837702e+10
  1.33263280e+11 -5.01739551e+11  1.89335516e+12 -7.15952267e+12
  2.71242538e+13 -1.02940475e+14  3.91300106e+14 -1.48962409e+15
  5.67857747e+15 -2.16748811e+16  8.28305613e+16 -3.16889100e+17
  2.21650898e+18 -1.23415673e+19  6.99397095e+19 -3.97121614e+20
  2.25665287e+21 -1.28331886e+22  7.30333576e+22 -4.15919083e+23
  2.37018398e+24 -1.35153342e+25  7.71134177e+25 -4.40229745e+26
  2.51457017e+27 -1.43705211e+28  8.21666908e+28 -4.70028689e+29]
'''
原文地址:https://www.cnblogs.com/tingweichen/p/14618086.html