Python实现语音端点检测，基音周期检测和语音共振峰估计【语音信号处理实战】.zip

共25个文件

py：10个

png：9个

wav：3个

版权申诉

Python

5星 · 超过95%的资源 56 浏览量 2023-04-17 15:23:22 上传评论 1 收藏 634KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

Python实现语音端点检测，基音周期检测和语音共振峰估计【语音信号处理实战】.zip （25个子文件）

Python实现语音端点检测，基音周期检测和语音共振峰估计【语音信号处理实战】

共振峰估计.py 2KB

C4_2_y.wav 53KB

C4_1_y_2.py 1013B

C4_1_y.wav 63KB

C4_1_y_4.py 1KB

C4_1_y_1.py 1KB

4.2基音周期检测.mdown 10KB

C4_3_y.py 2KB

end_detection.py 10KB

4.3共振峰估计.mdown 10KB

C4_2_y.py 2KB

C4_1_y_5.py 2KB

pitch_detection.py 4KB

4.1语音端点检测.mdown 23KB

images

共振峰估计.png 149KB

能零比.png 43KB

pitch.png 113KB

corr.png 40KB

TwoThr.png 40KB

对数频率距离.png 55KB

En.png 42KB

ellip.png 22KB

能熵比.png 39KB

C4_1_y_3.py 1KB

C4_3_y.wav 684B

from chapter3_分析实验.C3_1_y_1 import enframe from chapter3_分析实验.timefeature import * def vad_revr(dst1, T1, T2): """ 端点检测反向比较函数 :param dst1: :param T1: :param T2: :return: """ fn = len(dst1) maxsilence = 8 minlen = 5 status = 0 count = np.zeros(fn) silence = np.zeros(fn) xn = 0 x1 = np.zeros(fn) x2 = np.zeros(fn) for n in range(1, fn): if status == 0 or status == 1: if dst1[n] < T2: x1[xn] = max(1, n - count[xn] - 1) status = 2 silence[xn] = 0 count[xn] += 1 elif dst1[n] < T1: status = 1 count[xn] += 1 else: status = 0 count[xn] = 0 x1[xn] = 0 x2[xn] = 0 if status == 2: if dst1[n] < T1: count[xn] += 1 else: silence[xn] += 1 if silence[xn] < maxsilence: count[xn] += 1 elif count[xn] < minlen: status = 0 silence[xn] = 0 count[xn] = 0 else: status = 3 x2[xn] = x1[xn] + count[xn] if status == 3: status = 0 xn += 1 count[xn] = 0 silence[xn] = 0 x1[xn] = 0 x2[xn] = 0 el = len(x1[:xn]) if x1[el - 1] == 0: el -= 1 if x2[el - 1] == 0: print('Error: Not find endding point!\n') x2[el] = fn SF = np.zeros(fn) NF = np.ones(fn) for i in range(el): SF[int(x1[i]):int(x2[i])] = 1 NF[int(x1[i]):int(x2[i])] = 0 voiceseg = findSegment(np.where(SF == 1)[0]) vsl = len(voiceseg.keys()) return voiceseg, vsl, SF, NF def vad_forw(dst1, T1, T2): """ 端点检测正向比较函数 :param dst1: :param T1: :param T2: :return: """ fn = len(dst1) maxsilence = 8 minlen = 5 status = 0 count = np.zeros(fn) silence = np.zeros(fn) xn = 0 x1 = np.zeros(fn) x2 = np.zeros(fn) for n in range(1, fn): if status == 0 or status == 1: if dst1[n] > T2: x1[xn] = max(1, n - count[xn] - 1) status = 2 silence[xn] = 0 count[xn] += 1 elif dst1[n] > T1: status = 1 count[xn] += 1 else: status = 0 count[xn] = 0 x1[xn] = 0 x2[xn] = 0 if status == 2: if dst1[n] > T1: count[xn] += 1 else: silence[xn] += 1 if silence[xn] < maxsilence: count[xn] += 1 elif count[xn] < minlen: status = 0 silence[xn] = 0 count[xn] = 0 else: status = 3 x2[xn] = x1[xn] + count[xn] if status == 3: status = 0 xn += 1 count[xn] = 0 silence[xn] = 0 x1[xn] = 0 x2[xn] = 0 el = len(x1[:xn]) if x1[el - 1] == 0: el -= 1 if x2[el - 1] == 0: print('Error: Not find endding point!\n') x2[el] = fn SF = np.zeros(fn) NF = np.ones(fn) for i in range(el): SF[int(x1[i]):int(x2[i])] = 1 NF[int(x1[i]):int(x2[i])] = 0 voiceseg = findSegment(np.where(SF == 1)[0]) vsl = len(voiceseg.keys()) return voiceseg, vsl, SF, NF def findSegment(express): """ 分割成語音段 :param express: :return: """ if express[0] == 0: voiceIndex = np.where(express) else: voiceIndex = express d_voice = np.where(np.diff(voiceIndex) > 1)[0] voiceseg = {} if len(d_voice) > 0: for i in range(len(d_voice) + 1): seg = {} if i == 0: st = voiceIndex[0] en = voiceIndex[d_voice[i]] elif i == len(d_voice): st = voiceIndex[d_voice[i - 1] + 1] en = voiceIndex[-1] else: st = voiceIndex[d_voice[i - 1] + 1] en = voiceIndex[d_voice[i]] seg['start'] = st seg['end'] = en seg['duration'] = en - st + 1 voiceseg[i] = seg return voiceseg def vad_TwoThr(x, wlen, inc, NIS): """ 使用门限法检测语音段 :param x: 语音信号 :param wlen: 分帧长度 :param inc: 帧移 :param NIS: :return: """ maxsilence = 15 minlen = 5 status = 0 y = enframe(x, wlen, inc) fn = y.shape[0] amp = STEn(x, wlen, inc) zcr = STZcr(x, wlen, inc, delta=0.01) ampth = np.mean(amp[:NIS]) zcrth = np.mean(zcr[:NIS]) amp2 = 2 * ampth amp1 = 4 * ampth zcr2 = 2 * zcrth xn = 0 count = np.zeros(fn) silence = np.zeros(fn) x1 = np.zeros(fn) x2 = np.zeros(fn) for n in range(fn): if status == 0 or status == 1: if amp[n] > amp1: x1[xn] = max(1, n - count[xn] - 1) status = 2 silence[xn] = 0 count[xn] += 1 elif amp[n] > amp2 or zcr[n] > zcr2: status = 1 count[xn] += 1 else: status = 0 count[xn] = 0 x1[xn] = 0 x2[xn] = 0 elif status == 2: if amp[n] > amp2 and zcr[n] > zcr2: count[xn] += 1 else: silence[xn] += 1 if silence[xn] < maxsilence: count[xn] += 1 elif count[xn] < minlen: status = 0 silence[xn] = 0 count[xn] = 0 else: status = 3 x2[xn] = x1[xn] + count[xn] elif status == 3: status = 0 xn += 1 count[xn] = 0 silence[xn] = 0 x1[xn] = 0 x2[xn] = 0 el = len(x1[:xn]) if x1[el - 1] == 0: el -= 1 if x2[el - 1] == 0: print('Error: Not find endding point!\n') x2[el] = fn SF = np.zeros(fn) NF = np.ones(fn) for i in range(el): SF[int(x1[i]):int(x2[i])] = 1 NF[int(x1[i]):int(x2[i])] = 0 voiceseg = findSegment(np.where(SF == 1)[0]) vsl = len(voiceseg.keys()) return voiceseg, vsl, SF, NF, amp, zcr def vad_corr(y, wnd, inc, NIS, th1, th2): x = enframe(y, wnd, inc) Ru = STAc(x.T)[0] Rum = Ru / np.max(Ru) thredth = np.max(Rum[:NIS]) T1 = th1 * thredth T2 = th2 * thredth voiceseg, vsl, SF, NF = vad_forw(Rum, T1, T2) return voiceseg, vsl, SF, NF, Rum def vad_specEN(data, wnd, inc, NIS, thr1, thr2, fs): from scipy.signal import medfilt x = enframe(data, wnd, inc) X = np.abs(np.fft.fft(x, axis=1)) if len(wnd) == 1: wlen = wnd else: wlen = len(wnd) df = fs / wlen fx1 = int(250 // df + 1) # 250Hz位置 fx2 = int(3500 // df + 1) # 500Hz位置 km = wlen // 8 K = 0.5 E = np.zeros((X.shape[0], wlen // 2)) E[:, fx1 + 1:fx2 - 1] = X[:, fx1 + 1:fx2 - 1] E = np.multiply(E, E) Esum = np.sum(E, axis=1, keepdims=True) P1 = np.divide(E, Esum) E = np.where(P1 >= 0.9, 0, E) Eb0 = E[:, 0::4] Eb1 = E[:, 1::4] Eb2 = E[:, 2::4] Eb3 = E[:, 3::4] Eb = Eb0 + Eb1 + Eb2 + Eb3 prob

评论收藏

内容反馈

版权申诉