% % Word recognition in MATLAB
% % Author : Dr. Selvaraaju Murugesan, LaTrobe University, Ausralia
% % Date : 09-07-2014
% % traning phase
clc; clear;close all
w = warning ('off','all');
Fs=8000;
% % For the word "ONE"
filepart1='myrecordone';
filepart2='.wav';
% % check for signal length
% % we will append zeros after VAD to make sure all the signals are of
% % equal length
% % We need same number of MFCC of each signal
length_all_sig=[];
for i=1:10
filename=strcat(filepart1,num2str(i),filepart2);
y1=wavread(filename);
results = VAD(y1,0.1,0.025,0.0125,20,1);
ind_st=(0:size(results)-1)*200+1;
ind_en=(1:size(results))*200;
ind1=ind_st(logical(results));
ind2=ind_en(logical(results));
all_ind=cell2mat(arrayfun(@colon,ind1,ind2,'uni',0));
filt_signal=y1(all_ind);
length_all_sig=[length_all_sig length(filt_signal)];
end
max_length=max(length_all_sig);
all_data_one=[];
for i = 1:10
filename=strcat(filepart1,num2str(i),filepart2);
y1=wavread(filename);
results = VAD(y1,0.1,0.025,0.0125,20,1);
ind_st=(0:size(results)-1)*200+1;
ind_en=(1:size(results))*200;
ind1=ind_st(logical(results));
ind2=ind_en(logical(results));
all_ind=cell2mat(arrayfun(@colon,ind1,ind2,'uni',0));
filt_signal=y1(all_ind);
if length(filt_signal)==max_length
[cepstra1,aspectrum,pspectrum] = melfcc(y1,Fs,'wintime',0.025,'hoptime',0.010);
else
filt_signal=[filt_signal' zeros(1,max_length-length(filt_signal))];
[cepstra1,aspectrum,pspectrum] = melfcc(filt_signal,Fs,'wintime',0.025,'hoptime',0.010);
end
all_data_one=[all_data_one cepstra1];
end
% % For the word "TWO"
all_data_two=[];
filepart1='myrecordtwo';
length_all_sig=[];
for i=1:10
filename=strcat(filepart1,num2str(i),filepart2);
y1=wavread(filename);
results = VAD(y1,0.1,0.025,0.0125,20,1);
ind_st=(0:size(results)-1)*200+1;
ind_en=(1:size(results))*200;
ind1=ind_st(logical(results));
ind2=ind_en(logical(results));
all_ind=cell2mat(arrayfun(@colon,ind1,ind2,'uni',0));
filt_signal=y1(all_ind);
length_all_sig=[length_all_sig length(filt_signal)];
end
max_length=max(length_all_sig);
for i = 1:10
filename=strcat(filepart1,num2str(i),filepart2);
y1=wavread(filename);
results = VAD(y1,0.1,0.025,0.0125,20,1);
ind_st=(0:size(results)-1)*200+1;
ind_en=(1:size(results))*200;
ind1=ind_st(logical(results));
ind2=ind_en(logical(results));
all_ind=cell2mat(arrayfun(@colon,ind1,ind2,'uni',0));
filt_signal=y1(all_ind);
if length(filt_signal)==max_length
[cepstra2,aspectrum,pspectrum] = melfcc(y1,Fs,'wintime',0.025,'hoptime',0.010);
else
filt_signal=[filt_signal' zeros(1,max_length-length(filt_signal))];
[cepstra2,aspectrum,pspectrum] = melfcc(filt_signal,Fs,'wintime',0.025,'hoptime',0.010);
end
all_data_two=[all_data_two cepstra2];
end
% % For the word "THREE"
all_data_three=[];
filepart1='myrecordthree';
length_all_sig=[];
for i=1:10
filename=strcat(filepart1,num2str(i),filepart2);
y1=wavread(filename);
results = VAD(y1,0.1,0.025,0.0125,20,1);
ind_st=(0:size(results)-1)*200+1;
ind_en=(1:size(results))*200;
ind1=ind_st(logical(results));
ind2=ind_en(logical(results));
all_ind=cell2mat(arrayfun(@colon,ind1,ind2,'uni',0));
filt_signal=y1(all_ind);
length_all_sig=[length_all_sig length(filt_signal)];
end
max_length=max(length_all_sig);
for i = 1:10
filename=strcat(filepart1,num2str(i),filepart2);
y1=wavread(filename);
results = VAD(y1,0.1,0.025,0.0125,20,1);
ind_st=(0:size(results)-1)*200+1;
ind_en=(1:size(results))*200;
ind1=ind_st(logical(results));
ind2=ind_en(logical(results));
all_ind=cell2mat(arrayfun(@colon,ind1,ind2,'uni',0));
filt_signal=y1(all_ind);
if length(filt_signal)==max_length
[cepstra3,aspectrum,pspectrum] = melfcc(y1,Fs,'wintime',0.025,'hoptime',0.010);
else
filt_signal=[filt_signal' zeros(1,max_length-length(filt_signal))];
[cepstra3,aspectrum,pspectrum] = melfcc(filt_signal,Fs,'wintime',0.025,'hoptime',0.010);
end
all_data_three=[all_data_three cepstra3];
end
% % Building model
X=[all_data_one'];
options = statset('MaxIter',500,'Display','final');
obj1 = gmdistribution.fit(X,8,'CovType',...
'diagonal','Options',options);
X=[all_data_two'];
obj2 = gmdistribution.fit(X,8,'CovType',...
'diagonal','Options',options);
X=[all_data_three'];
obj3 = gmdistribution.fit(X,8,'CovType',...
'diagonal','Options',options);
% % Test data
test_data=cepstra1';
% test_data=cepstra1';
% test_data=cepstra2';
% % Word recognition
[~,nlogl1] = posterior(obj1,test_data);
[~,nlogl2] = posterior(obj2,test_data);
[~,nlogl3] = posterior(obj3,test_data);
log_like=[nlogl1 nlogl2 nlogl3];
[~,Spoken_word]=min(log_like)
Speech Recognition.rar_in_recognition Word_speech recognition
版权申诉
88 浏览量
2022-07-15
19:40:47
上传
评论
收藏 426KB RAR 举报
JonSco
- 粉丝: 72
- 资源: 1万+
最新资源
- 基于Python和PyTorch框架完成的一个手写数字识别实验源码(带MINIST手写数字数据集)+详细注释(高分项目)
- 基于Matlab在MNIST数据集上利用CNN完成手写体数字识别任务,并实现单层CNN反向传播算法+源代码+文档说明(高分项目)
- NVIDIA驱动、CUDA和Pytorch及其依赖
- html动态爱心代码一(附源码)
- c40539bc-071a-486c-9d52-9d0c18d62dac 4.html
- 基于物理的非视域成像(NLOS)算法,利用了nerf+python源码+文档说明
- yuluer知更鸟.7z(1).001
- python课程设计-基于tensorflow实现的图文生成程序,数据集flickr30k-images+源代码+文档说明+截图
- python作业-基于Flickr30k数据集实现图像文本跨模态搜索python源码+数据集+测试界面+项目说明(高分课程设计)
- 基于Qt实现医院信息管理系统c++源码+文档说明+数据库(期末大作业)
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈