% mfcc implementation
close all;
clear all;
% 相关参数设定
fs = 16000; %采样率
Num_of_sample_point = 400; % 设N为一帧的采样点数,有x/16000*1000 = 25ms,得到采样点数N=400
Num_of_fft_point = 512; % 每帧补零到512个采样点
% 梅尔频率三角滤波器设计
Num_of_fliter_in_band = 40; %三角滤波器的数目
fmax = fs/2; %最大频率
maxMelFreq = 1125*log(1+fmax/700); %最大梅尔频率
delta = maxMelFreq/(Num_of_fliter_in_band+1); %梅尔滤波器宽度
LEN_MELREC = 13; %特征矩阵13纬度
m = 0:delta:(Num_of_fliter_in_band+1)*delta; %滤波器中心频率(梅尔频域)
h = 700*(exp(m/1125)-1); %滤波器中心频率(频域)
f = floor((Num_of_fft_point+1)*h/fs); %计算频谱到梅尔谱的映射关系
melFilters = zeros(Num_of_fliter_in_band,3); %初始化
for i=1:Num_of_fliter_in_band %计算40个梅尔三角滤波器参数
for j=1:3 %三角滤波器的下限,中心和上限频率
melFilters(i,j) = f(i+j-1);
end
end
% 1.语音采集
x = audioread('zsdx.wav'); %获取录音数据
% 2.预加重
voice = filter([1 -0.95], 1, x);
% 3.分帧 25ms为一帧每10ms分一次帧(N个采样点集合成一个帧,N通常为256或512)
% 4.加窗 对于每一帧而言
frames = enframe(voice,hamming(Num_of_sample_point),Num_of_sample_point/2.5);
Num_of_frames = size(frames,1);
% 5.补零 400个采样点补成512个采样点
frames = [frames zeros(Num_of_frames, Num_of_fft_point-Num_of_sample_point)];
% 初始化数组
FFT_of_frames = zeros(size(frames));
PSD_of_frames = zeros(size(frames));
mel = zeros(Num_of_frames,Num_of_fliter_in_band);
melRec = zeros(Num_of_frames,Num_of_fliter_in_band);
new_melRec = zeros(size(melRec));
% 以下逐帧处理
for fn = 1:Num_of_frames % 帧号从1到Num_of_frames
% 6.FFT
FFT_of_frames(fn,:) = fftshift(fft(frames(fn,:))); %快速傅立叶变换FFT
PSD_of_frames(fn,:) = abs(FFT_of_frames(fn,:)).^2; %功率谱
% 7.Mel滤波
for i = 1:Num_of_fliter_in_band %共计40个滤波器,最后的梅尔谱也是40个点
for j = 1:Num_of_fft_point %根据公式逐点计算
if(j>= melFilters(i,1) && j <= melFilters(i,2))
mel(fn,i) = mel(fn,i) + ((j-melFilters(i,1))/(melFilters(i,2)-melFilters(i,1))^2)*PSD_of_frames(fn,j);
end
if(j > melFilters(i,2) && j <= melFilters(i,3))
mel(fn,i) = mel(fn,i) + ((j-melFilters(i,1))/(melFilters(i,3)-melFilters(i,2))^2)*PSD_of_frames(fn,j);
end
end
end
% 8.离散余弦变换DCT, 得到梅尔倒谱
for i = 1:LEN_MELREC % 前13纬输出
for j = 1:Num_of_fliter_in_band
if (mel(fn,j) <= -0.0001 || mel(fn,j) >= 0.0001) %门限
melRec(fn,i) = melRec(fn,i) + log(mel(fn,j))*cos(pi*i/(2*Num_of_fliter_in_band)*(2*j+1));
end
end
end
% 9.归一化
new_melRec(fn,1:LEN_MELREC) = (melRec(fn,1:LEN_MELREC)-mean(melRec(fn,1:LEN_MELREC)))/std(melRec(fn,1:LEN_MELREC));
end
%逐帧处理完毕
figure(1); %音频信号
plot(x),title('原音频'); axis([0 32000 -1 1]);hold
plot(voice,'r'),title('预加重后');
figure(2); %特征矩阵色图
subplot(2,1,1),imagesc(melRec(:,1:LEN_MELREC)'),title('MEL倒谱');
subplot(2,1,2),imagesc(new_melRec(:,1:LEN_MELREC)'),title('归一化的MEL倒谱');
figure(3);
No = 110;%第No帧的结果
subplot(3,3,1),plot(x(No*Num_of_sample_point/2.5+1:No*Num_of_sample_point/2.5+Num_of_sample_point)),title('原帧');
subplot(3,3,2),plot(voice(No*Num_of_sample_point/2.5+1:No*Num_of_sample_point/2.5+Num_of_sample_point)),title('预加重后');
subplot(3,3,3),plot(frames(No,:)),axis([0,512,min(frames(No,:)),max(frames(No,:))]),title('加窗补零后'); axis([0 512 min(frames(No,:)) max(frames(No,:))]);
subplot(3,3,4),plot(PSD_of_frames(No,:)),title('功率谱'); axis([0 512 min(PSD_of_frames(No,:)) max(PSD_of_frames(No,:))]);
subplot(3,3,5),plot(mel(No,:)),title('mel谱');
subplot(3,3,6),plot(log(mel(No,:))),title('log.mel谱');
subplot(3,3,7:9),stem(new_melRec(No,1:LEN_MELREC)),title('mel倒谱');
评论0