function htk_recognizer = train_htk_recognizer(traindat, word_list, word_grammar, phone_dict, traintranscripts, hmmtemplate, nmix, verb, shell_script, traindat_filenames);
% recognizer = train_htk_recognizer(traindat, word_list, word_grammar, dict, traintranscripts,
% hmmtemplate, nmix, verb)
%
% Use HTK to train a simple HMM speech recognizer.
%
% Inputs:
% - traindat - cell array of training data (or name of scp
% file or cell array containing a list of filenames)
% - word_list - cell array of words in the grammar (or filename)
% - word_grammar - fsm data structure containing the HTK grammar of word
% sequences accepted by the recognizer (or filename)
% - dict - phone dictionary - cell array (one word per
% element) that translates words into phones (or filename)
% - traintranscripts - cell array of string word transcripts of
% traindat (or filename)
% - hmmtemplate* - hmm structure containing the template
% symbol hmm. Defaults to a 3 state forward
% model.
% - nmix* - number of mixture components to split each
% state into (if hmmtemplate has only single
% gaussian emissions). Defaults to 1.
%
% * optional argument
%
% Outputs:
% - recognizer - structure containing the components of an hmm
% recognizer:
% recog.hmms - cell array of HMMs
% recog.grammar
% recog.word_list
% recog.phone_list
% recog.dict
if nargin < 9, shell_script = 'train_htk_recognizer.sh'; end
SCRIPT_DIR = regexp(which('train_htk_recognizer'), '(.+/)[^/]+$', 'tokens');
SCRIPT_DIR = SCRIPT_DIR{1}{1};
% is training data a list of filenames?
traindat_contains_data = 0;
if iscell(traindat)
traindat_contains_data = ~ischar(traindat{1});
end
if nargin < 6
% FIXME - this is broken if traindat_contains_data is false
[ndim, ndat] = size(traindat{1});
hmmtemplate = struct('name', 'template', ...
'nstates', 3, ...
'start_prob', log([1, 0, 0]), ...
'end_prob', log([0, 0, 0.3]), ...
'transmat', log(0.5*[1 1 0; 0 1 1; 0 0 1.4]), ...
'emission_type', 'gaussian',...
'means', zeros(ndim, 3), ...
'covars', ones(ndim, 3));
end
if nargin < 7
nmix = 1;
end
if nargin < 8
verb = 0;
end
HTK_OPTIONS = '';
if verb
HTK_OPTIONS = ['-A -D -T ' num2str(verb)];
end
%%% Setup:
% write a bunch of files.
base_dir = [tempname '/'];
mkdir(base_dir);
% place to store the data
mkdir([base_dir '/data']);
filename_prefix = [base_dir 'htk'];
if isstruct(word_grammar)
grammar_filename = [filename_prefix '.grammar'];
write_htk_bnf(grammar_filename, word_grammar);
elseif iscellstr(word_grammar)
grammar_filename = [filename_prefix '.grammar'];
write_text_file(grammar_filename, word_grammar);
else
grammar_filename = word_grammar;
if exist(word_grammar, 'file')
word_grammar = read_text_file(grammar_filename);
end
end
% dictionary
if iscellstr(phone_dict)
phone_dict_filename = [filename_prefix '.dict'];
write_text_file(phone_dict_filename, phone_dict);
else
phone_dict_filename = phone_dict;
end
% write word list
if iscellstr(word_list)
word_list_filename = [filename_prefix '.wordlist'];
write_text_file(word_list_filename, word_list);
else
word_list_filename = word_list;
word_list = read_text_file(word_list_filename);
end
% write out training data...
if traindat_contains_data
for n = 1:length(traindat)
if nargin < 9
traindat_filenames{n} = [base_dir 'data/htkdat_' num2str(n)];
else
traindat_filenames{n} = [base_dir 'data/' traindat_filenames{n}];
end
% custom data format:
htkcode = 9; % USER
htkwrite(traindat{n}', traindat_filenames{n}, htkcode);
end
else
if iscell(traindat)
traindat_filenames = traindat;
else
% traindat should contain the name of an HTK .scp file with a list
% of training data filenames
traindat_filenames = read_text_file(traindat);
end
end
% I don't think HTK actually needs the wav file for anything
traindat_featfile = traindat_filenames;
%traindat_featfile{n} = [traindat_filenames{n} ' ' traindat_filenames{n}];
% write .scp file (tells htk where to find feature files)
featfiles = [base_dir 'trainfiles.scp'];
write_text_file(featfiles, traindat_featfile);
% format word transcripts ...
if iscellstr(traintranscripts)
word_trans_filename = [filename_prefix '.word_transcripts'];
fid = fopen(word_trans_filename,'w');
for n = 1:length(traintranscripts)
if n == 1
fprintf(fid, '#!MLF!#');
end
fprintf('\n\"*/%s\"\n', [traindat_filenames{n} '.lab']);
str = strread('%s ', traintranscripts{n});
for word = str
fprintf(fid, '%s\n', word);
end
fprintf(fid, '.\n');
end
fclose(fid);
else
word_trans_filename = traintranscripts;
end
% write HMM template
if ~isfield(hmmtemplate, 'name'), hmmtemplate.name = 'proto'; end
hmm_template_filename = [base_dir 'proto'];
write_htk_hmm(hmm_template_filename, hmmtemplate);
%%% Training:
% we're going to use a shell script to do the rest of this
%retval = system(['sh ' SCRIPT_DIR 'train_htk_recognizer.sh ' ...
retval = system(['sh ' SCRIPT_DIR shell_script ' ' ...
featfiles ' ' grammar_filename ' ' word_list_filename ' ' ...
phone_dict_filename ' ' word_trans_filename ' ' ...
hmm_template_filename ' ' num2str(nmix) ' ' base_dir ...
' "' HTK_OPTIONS '"']);
if retval ~= 0
%rmdir(base_dir, 's');
error('HTK error!');
end
%%% Output:
htk_recognizer.hmms = read_htk_hmm([base_dir 'hmm_final/hmmdefs']);
htk_recognizer.grammar = word_grammar;
htk_recognizer.word_list = word_list;
htk_recognizer.phone_list = read_text_file([base_dir 'monophones0']);
% only include words in the grammar, so read the right file
htk_recognizer.dict = read_text_file([base_dir 'dict']);
rmdir(base_dir, 's');
基于Matlab实现HTK语音识别工具包(源码).rar
版权申诉
44 浏览量
2023-05-08
10:48:53
上传
评论
收藏 31KB RAR 举报
Matlab仿真实验室
- 粉丝: 2w+
- 资源: 2179
最新资源
- 微软常用运行库 游戏运行库 VC++各个版本
- 微信小程序开发教程.pptx
- MyBatis动态SQL是一种强大的特性,它允许我们在SQL语句中根据条件动态地添加或删除某些部分,从而实现更加灵活和高效的数据
- 锐捷网络认证中心网络管理.pdf
- MyBatis动态SQL是一种强大的特性,它允许我们在SQL语句中根据条件动态地添加或删除某些部分,从而实现更加灵活和高效的数据
- SD8233LF是一款用于单按键触摸及接近感应开关,其用途是替代传统的机械型开关芯片IC
- 基于YOLOv5的烟雾火焰检测算法研究
- 基于STM32的联合调试侦听设备解决方案原理图PCB源文件调试工具视频(大赛作品)
- MyBatis动态SQL是一种强大的特性,它允许我们在SQL语句中根据条件动态地添加或删除某些部分,从而实现更加灵活和高效的数据
- MyBatis动态SQL是一种强大的特性,它允许我们在SQL语句中根据条件动态地添加或删除某些部分,从而实现更加灵活和高效的数据
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈