function setTemplates
s1.data=[];
s2.data=[];
s3.data=[];
s1.fs=16000;
s2.fs=16000;
s3.fs=16000;
for i=1:10
fs=16000;
duration=2;
fprintf('Begin by pressing any key % gseconds:\n',duration);pause
fprintf('recording...\n');
myrecord=audiorecorder(fs,8,1);
recordblocking(myrecord, duration);
myrecord = getaudiodata(myrecord);
fprintf('Finish\n');
speechIn=my_vad(myrecord);
s1.data{i}=mfcc(speechIn,fs,12,256,80);
end
save Vectors1 s1;
for i=1:10
fs=16000;
duration=2;
fprintf('Begin by pressing any key % gseconds:\n',duration);pause
fprintf('recording...\n');
myrecord=audiorecorder(fs,8,1);
recordblocking(myrecord, duration);
myrecord = getaudiodata(myrecord);
fprintf('Finish\n');
speechIn=my_vad(myrecord);
s2.data{i}=mfcc(speechIn,fs,12,256,80);
end
save Vectors2 s2;
for i=1:10
fs=16000;
duration=2;
fprintf('Begin by pressing any key % gseconds:\n',duration);pause
fprintf('recording...\n');
myrecord=audiorecorder(fs,8,1);
recordblocking(myrecord, duration);
myrecord = getaudiodata(myrecord);
fprintf('Finish\n');
speechIn=my_vad(myrecord);
s3.data{i}=mfcc(speechIn,fs,12,256,80);
end
save Vectors3 s3;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [afterEndDet]=my_vad(x)
x = double(x);
x = x / max(abs(x));
%常数设置
FrameLen = 256;%帧长为256点
FrameInc = 80;%帧移为80点
amp1 = 10;%初始短时能量高门限
amp2 = 2;%初始短时能量低门限
zcr1 = 10;%初始短时过零率高门限
zcr2 = 5;%初始短时过零率低门限
maxsilence = 8; % 8*10ms = 80ms
minlen = 15; % 15*10ms = 150ms
%语音段的最短长度,若语音段长度小于此值,则认为其为一段噪音
status = 0; %初始状态为静音状态
count = 0; %初始语音段长度为0
silence = 0; %初始静音段长度为0
%计算过零率
x1=x(1:end-1);
x2=x(2:end);
%分帧
tmp1=enframe(x1,FrameLen,FrameInc);
tmp2=enframe(x2,FrameLen,FrameInc);
signs = (tmp1.*tmp2)<0;
diffs = (tmp1 -tmp2)>0.02;
zcr = sum(signs.*diffs, 2);%一帧一个值
amp = sum(abs(enframe(x, FrameLen, FrameInc)), 2);
%调整能量门限
amp1 = min(amp1, max(amp)/4);
amp2 = min(amp2, max(amp)/8);
x1 = 0;
x2 = 0;
v_num=0;%记录语音段数
v_Begin=[];%记录所有语音段的起点
v_End=[];%记录所有语音段的终点
for n=1:length(zcr)
goto = 0;
switch status
case {0,1} % 0 = 静音, 1 = 可能开始
if amp(n) > amp1 % 确信进入语音段
x1 = max(n-count-1,1);
% '打印每个x1*FrameInc'
% x1*FrameInc
status = 2;
silence = 0;
count = count + 1;
elseif amp(n) > amp2 | ... % 可能处于语音段
zcr(n) > zcr2
status = 1;
count = count + 1;
else % 静音状态
status = 0;
count = 0;
end
case 2, % 2 = 语音段
if amp(n) > amp2 | ... % 保持在语音段
zcr(n) > zcr2
count = count + 1;
else % 语音将结束
silence = silence+1;
if silence < maxsilence % 静音还不够长,尚未结束
count = count + 1;
elseif count < minlen % 语音长度太短,认为是噪声
status = 0;
silence = 0;
count = 0;
else % 语音结束
status = 3;
end
end
case 3,
%break;
%记录当前语音段数据
v_num=v_num+1; %语音段个数加一
count = count-silence/2;
x2 = x1 + count -1;
v_Begin(1,v_num)=x1*FrameInc;
v_End(1,v_num)=x2*FrameInc;
%不跳出 数据归零继续往下查找下一段语音
status = 0; %初始状态为静音状态
count = 0; %初始语音段长度为0
silence = 0; %初始静音段长度为0
end
end
if length(v_End)==0
x2 = x1 + count -1;
v_Begin(1,1)=x1*FrameInc;
v_End(1,1)=x2*FrameInc;
end
lenafter=0;
for len=1:length(v_End)
tmp=v_End(1,len)-v_Begin(1,len);
lenafter=lenafter+tmp;
end
afterEndDet=zeros(lenafter,1);%返回去除静音段的语音信号
beginnum=0;
endnum=0;
for k=1:length(v_End)
tmp=x(v_Begin(1,k):v_End(1,k));
beginnum=endnum+1;
endnum=beginnum+v_End(1,k)-v_Begin(1,k);
afterEndDet(beginnum:endnum)=tmp;
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ccc=mfcc(x,fs,p,frameSize,inc)
bank=melbankm(p,frameSize,fs,0,0.5,'m');
bank=full(bank);
bank=bank/max(bank(:));
for k=1:12
n=0:p-1;
dctcoef(k,:)=cos((2*n+1)*k*pi/(2*p));
end
w=1+6*sin(pi*[1:12]./12);
w=w/max(w);
xx=double(x);
xx=filter([1-0.9375],1,xx);
xx=enframe(xx,frameSize,inc);
n2=fix(frameSize/2)+1;
for i=1:size(xx,1)
y=xx(i,:);
s=y'.*hamming(frameSize);
t=abs(fft(s));
t=t.^2;
c1=dctcoef*log(bank*t(1:n2));
c2=c1.*w';
m(i,:)=c2';
end
dtm=zeros(size(m));
for i=3:size(m,1)-2
dtm(i,:)=-2*m(i-2,:)-m(i-1,:)+m(i+1,:)+2*m(i+2,:);
end
dtm=dtm/3;
ccc=[m dtm];
ccc=ccc(3:size(m,1)-2,:);
end
评论0