from chapter3_分析实验.C3_1_y_1 import enframe
from chapter3_分析实验.timefeature import *
def vad_revr(dst1, T1, T2):
"""
端点检测反向比较函数
:param dst1:
:param T1:
:param T2:
:return:
"""
fn = len(dst1)
maxsilence = 8
minlen = 5
status = 0
count = np.zeros(fn)
silence = np.zeros(fn)
xn = 0
x1 = np.zeros(fn)
x2 = np.zeros(fn)
for n in range(1, fn):
if status == 0 or status == 1:
if dst1[n] < T2:
x1[xn] = max(1, n - count[xn] - 1)
status = 2
silence[xn] = 0
count[xn] += 1
elif dst1[n] < T1:
status = 1
count[xn] += 1
else:
status = 0
count[xn] = 0
x1[xn] = 0
x2[xn] = 0
if status == 2:
if dst1[n] < T1:
count[xn] += 1
else:
silence[xn] += 1
if silence[xn] < maxsilence:
count[xn] += 1
elif count[xn] < minlen:
status = 0
silence[xn] = 0
count[xn] = 0
else:
status = 3
x2[xn] = x1[xn] + count[xn]
if status == 3:
status = 0
xn += 1
count[xn] = 0
silence[xn] = 0
x1[xn] = 0
x2[xn] = 0
el = len(x1[:xn])
if x1[el - 1] == 0:
el -= 1
if x2[el - 1] == 0:
print('Error: Not find endding point!\n')
x2[el] = fn
SF = np.zeros(fn)
NF = np.ones(fn)
for i in range(el):
SF[int(x1[i]):int(x2[i])] = 1
NF[int(x1[i]):int(x2[i])] = 0
voiceseg = findSegment(np.where(SF == 1)[0])
vsl = len(voiceseg.keys())
return voiceseg, vsl, SF, NF
def vad_forw(dst1, T1, T2):
"""
端点检测正向比较函数
:param dst1:
:param T1:
:param T2:
:return:
"""
fn = len(dst1)
maxsilence = 8
minlen = 5
status = 0
count = np.zeros(fn)
silence = np.zeros(fn)
xn = 0
x1 = np.zeros(fn)
x2 = np.zeros(fn)
for n in range(1, fn):
if status == 0 or status == 1:
if dst1[n] > T2:
x1[xn] = max(1, n - count[xn] - 1)
status = 2
silence[xn] = 0
count[xn] += 1
elif dst1[n] > T1:
status = 1
count[xn] += 1
else:
status = 0
count[xn] = 0
x1[xn] = 0
x2[xn] = 0
if status == 2:
if dst1[n] > T1:
count[xn] += 1
else:
silence[xn] += 1
if silence[xn] < maxsilence:
count[xn] += 1
elif count[xn] < minlen:
status = 0
silence[xn] = 0
count[xn] = 0
else:
status = 3
x2[xn] = x1[xn] + count[xn]
if status == 3:
status = 0
xn += 1
count[xn] = 0
silence[xn] = 0
x1[xn] = 0
x2[xn] = 0
el = len(x1[:xn])
if x1[el - 1] == 0:
el -= 1
if x2[el - 1] == 0:
print('Error: Not find endding point!\n')
x2[el] = fn
SF = np.zeros(fn)
NF = np.ones(fn)
for i in range(el):
SF[int(x1[i]):int(x2[i])] = 1
NF[int(x1[i]):int(x2[i])] = 0
voiceseg = findSegment(np.where(SF == 1)[0])
vsl = len(voiceseg.keys())
return voiceseg, vsl, SF, NF
def findSegment(express):
"""
分割成語音段
:param express:
:return:
"""
if express[0] == 0:
voiceIndex = np.where(express)
else:
voiceIndex = express
d_voice = np.where(np.diff(voiceIndex) > 1)[0]
voiceseg = {}
if len(d_voice) > 0:
for i in range(len(d_voice) + 1):
seg = {}
if i == 0:
st = voiceIndex[0]
en = voiceIndex[d_voice[i]]
elif i == len(d_voice):
st = voiceIndex[d_voice[i - 1] + 1]
en = voiceIndex[-1]
else:
st = voiceIndex[d_voice[i - 1] + 1]
en = voiceIndex[d_voice[i]]
seg['start'] = st
seg['end'] = en
seg['duration'] = en - st + 1
voiceseg[i] = seg
return voiceseg
def vad_TwoThr(x, wlen, inc, NIS):
"""
使用门限法检测语音段
:param x: 语音信号
:param wlen: 分帧长度
:param inc: 帧移
:param NIS:
:return:
"""
maxsilence = 15
minlen = 5
status = 0
y = enframe(x, wlen, inc)
fn = y.shape[0]
amp = STEn(x, wlen, inc)
zcr = STZcr(x, wlen, inc, delta=0.01)
ampth = np.mean(amp[:NIS])
zcrth = np.mean(zcr[:NIS])
amp2 = 2 * ampth
amp1 = 4 * ampth
zcr2 = 2 * zcrth
xn = 0
count = np.zeros(fn)
silence = np.zeros(fn)
x1 = np.zeros(fn)
x2 = np.zeros(fn)
for n in range(fn):
if status == 0 or status == 1:
if amp[n] > amp1:
x1[xn] = max(1, n - count[xn] - 1)
status = 2
silence[xn] = 0
count[xn] += 1
elif amp[n] > amp2 or zcr[n] > zcr2:
status = 1
count[xn] += 1
else:
status = 0
count[xn] = 0
x1[xn] = 0
x2[xn] = 0
elif status == 2:
if amp[n] > amp2 and zcr[n] > zcr2:
count[xn] += 1
else:
silence[xn] += 1
if silence[xn] < maxsilence:
count[xn] += 1
elif count[xn] < minlen:
status = 0
silence[xn] = 0
count[xn] = 0
else:
status = 3
x2[xn] = x1[xn] + count[xn]
elif status == 3:
status = 0
xn += 1
count[xn] = 0
silence[xn] = 0
x1[xn] = 0
x2[xn] = 0
el = len(x1[:xn])
if x1[el - 1] == 0:
el -= 1
if x2[el - 1] == 0:
print('Error: Not find endding point!\n')
x2[el] = fn
SF = np.zeros(fn)
NF = np.ones(fn)
for i in range(el):
SF[int(x1[i]):int(x2[i])] = 1
NF[int(x1[i]):int(x2[i])] = 0
voiceseg = findSegment(np.where(SF == 1)[0])
vsl = len(voiceseg.keys())
return voiceseg, vsl, SF, NF, amp, zcr
def vad_corr(y, wnd, inc, NIS, th1, th2):
x = enframe(y, wnd, inc)
Ru = STAc(x.T)[0]
Rum = Ru / np.max(Ru)
thredth = np.max(Rum[:NIS])
T1 = th1 * thredth
T2 = th2 * thredth
voiceseg, vsl, SF, NF = vad_forw(Rum, T1, T2)
return voiceseg, vsl, SF, NF, Rum
def vad_specEN(data, wnd, inc, NIS, thr1, thr2, fs):
from scipy.signal import medfilt
x = enframe(data, wnd, inc)
X = np.abs(np.fft.fft(x, axis=1))
if len(wnd) == 1:
wlen = wnd
else:
wlen = len(wnd)
df = fs / wlen
fx1 = int(250 // df + 1) # 250Hz位置
fx2 = int(3500 // df + 1) # 500Hz位置
km = wlen // 8
K = 0.5
E = np.zeros((X.shape[0], wlen // 2))
E[:, fx1 + 1:fx2 - 1] = X[:, fx1 + 1:fx2 - 1]
E = np.multiply(E, E)
Esum = np.sum(E, axis=1, keepdims=True)
P1 = np.divide(E, Esum)
E = np.where(P1 >= 0.9, 0, E)
Eb0 = E[:, 0::4]
Eb1 = E[:, 1::4]
Eb2 = E[:, 2::4]
Eb3 = E[:, 3::4]
Eb = Eb0 + Eb1 + Eb2 + Eb3
prob
Python实现语音端点检测,基音周期检测和语音共振峰估计【语音信号处理实战】.zip
版权申诉
5星 · 超过95%的资源 56 浏览量
2023-04-17
15:23:22
上传
评论 1
收藏 634KB ZIP 举报
不脱发的程序猿
- 粉丝: 24w+
- 资源: 5777
最新资源
- 20221230IqFQX3Yc.zip
- Microsoft.AspNetCore.Authentication.JwtBearer 3.1.32
- picCombobox图片下拉菜单案例.xls
- Java语言实现使用Prim(普利姆)算法求最小生成树(源代码)
- 奕尧笔记 yiyao-notebook-evv Setup 3.1.6.exe
- 继保实验模版 实验一 三段式电流保护及自动重合闸 实验二 变压器差动保护 实验三 微机型距离保护实验
- web前端页面通过BrowserPrint API连接斑马打印机进行RFID条形码贴纸打印
- 导体、导线、各种电缆载流量表
- 坐标点批量转面工具,可以将经纬度坐标点批量转为KML格式的面文件
- 海龟画图一个胶囊表情包
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈