端点检测
语音端点检测就是检测语音信号的起点和终点,因此也叫起止点识别。在语
音识别中,一个关键问题就是如何将语音信号精确地检测出来,为获得准确的识
别提供前提。
1.基于短时能量和过零率相结合的两级判别法
以能量E和过零率ZCR为特征的起止点算法的根据是背景噪声与语音的短时
段及特征从统计看都有相当的区别。
设语音波形时域信号为x (l),加窗分帧处理后得到第n帧语音信号为
先用短时能量作第一次判别,在此基础上用短时过零率作第二次判别。在用
短时能量作第一次判别时,为了不至于把语音能量的局部下降点错误地当成起止
点,常采用双门限比较的方法。
代码:
%设置初始量
frameSize=256;%帧长
overlap=128;%重叠
deltaEnergyLevel1=-20;%短时能量的两个门限
deltaEnergyLevel2=-10;
zcrRatio=0.2;%短时过零率门限
%读入语音文件
[filename,pathname]=uigetfile('D:\*.wav','open file:');
[y,fs,nbits]=wavread([pathname,filename]);%Reads input wav file
%消除DC电平偏移
y=y-mean(y);
%分帧
framedY=buffer(y,frameSize,overlap);
frameNum=size(framedY,2);%Number of frames
%计算短时能量和门限
energy=frame2logEnergy(framedY);
energyLevel1=max(energy)+deltaEnergyLevel1;
energyLevel2=max(energy)+deltaEnergyLevel2;
%计算短时过零率和门限
zcr=zcrate(framedY-0.03);%0.03设置噪声门限
zcrThreshold=max(zcr)*zcrRatio;
%根据较高的门限找到起止点
voicedIndex=find(energy>=energyLevel2);
sound=[];
k=1;
sound(k).begin=voicedIndex(1);
for i=2:length(voicedIndex)-1,
if voicedIndex(i+1)-voicedIndex(i)>1,
sound(k).end=voicedIndex(i);
sound(k+1).begin=voicedIndex(i+1);
k=k+1;
end
end
sound(k).end=voicedIndex(end);
%忽略一些小的细节
index=[];for i=1:length(sound),
if(sound(i).end-sound(i).begin)<3
index=[index,i];
end
end
sound(index)=[];
%根据较低的门限进一步判断
for i=1:length(sound),
head=sound(i).begin;
while(head-1)>=1&energy(head-1)>=energyLevel1,
head=head-1;
end
sound(i).begin=head;
tail=sound(i).end;
while(tail+1)<=length(energy)&energy(tail+1)>energyLevel1,
tail=tail+1;
end
sound(i).end=tail;
end
%根据zcr门限判断起止点
for i=1:length(sound),
head=sound(i).begin;
while(head-1)>=1&zcr(head-1)>=zcrThreshold,
head=head-1;
end
sound(i).begin=head;
tail=sound(i).end;
while(tail+1)<=length(zcr)&zcr(tail+1)>zcrThreshold,
tail=tail+1;
end
sound(i).end=tail;
end
%去掉重复的语音帧
if length(sound)~=0,
index=[];
for i=1:length(sound)-1,
if sound(i).begin==sound(i+1).begin&sound(i).end==sound(i+1).end,
index=[index,i];
end
end
sound(index)=[];end;
%将序列变换成整段语音的样点序列
if length(sound)~=0,
for i=1:length(sound),
out(i).begin=(sound(i).begin-1)*(frameSize-overlap)+1;
out(i).end=(sound(i).end)*(frameSize-overlap)+overlap;
end
else
out=[];
end;
%画图显示
subplot(3,1,1);
plot((1:length(y))/fs,y);
axis([-inf inf-1 1]);
ylabel('Amplitude');
title('Wave form');
subplot(3,1,2);
time=((0:frameNum-1)*(frameSize-overlap)+0.5*frameSize)/fs;
plot(time,energy,'.-');
line([min(time),max(time)],energyLevel1*[1 1],'color','c');
line([min(time),max(time)],energyLevel2*[1 1],'color','c');
axis tight
ylabel('Log energy(dB)');
title('Log energy');
subplot(3,1,3);
plot(time,zcr,'.-');
line([min(time),max(time)],zcrThreshold*[1 1],'color','c');
axis([-inf inf 0 inf]);
ylabel('ZCR');
title('Zero crossing rate');
%Plot end points
subplot(3,1,1);
yBound=[-1 1];
for i=1:length(sound),
line(sound(i).begin*(frameSize-overlap)/fs*[1,1],yBound,'color','r');
line(sound(i).end*(frameSize-overlap)/fs*[1,1],yBound,'color','g');
end
%Plot end points
subplot(3,1,2);
yBound=[min(energy)max(energy)];
for i=1:length(sound),
line(sound(i).begin*(frameSize-overlap)/fs*[1,1],yBound,'color','r');line(sound(i).end*(frameSize-overlap)/fs*[1,1],yBound,'color','g');
end
%Plot end points
subplot(3,1,3);
yBound=[0 max(zcr)];
for i=1:length(sound),
line(sound(i).begin*(frameSize-overlap)/fs*[1,1],yBound,'color','r');
line(sound(i).end*(frameSize-overlap)/fs*[1,1],yBound,'color','g');
end
%短时能量
function logEnergy=frame2logEnergy(framedY)
%frame2logEnergy Frame to log energy conversion
meanSquare=mean(framedY.^2);
meanSquare(meanSquare==0)=eps;%To avoid"log(0)"warning
logEnergy=10*log10(meanSquare);
%短时过零率
function count=zcrate(frame)
count=sum(diff(sign(frame))~=0);
count=count/(2*(length(frame)));
- 1
- 2
- 3
前往页