function [epInSampleIndex, epInFrameIndex, soundSegment, zeroOneVec, volume] = epdByVol(y, fs, nbits, epdParam, plotOpt)
% epdByVol: EPD based on volume only
% Usage: [epInSampleIndex, epInFrameIndex, soundSegment, zeroOneVec, volume] = epdByVol(y, fs, nbits, epdParam, plotOpt)
% epInSampleIndex: two-element end-points in sample index
% epInFrameIndex: two-element end-points in frame index
% soundSegment: resulting sound segments
% zeroOneVec: zero-one vector for each frame
% volume: volume
% y: input audio signals
% fs: sampling rate
% epdParam: parameters for EPD
% plotOpt: 0 for silence operation, 1 for plotting
%
% Example:
% waveFile='SingaporeIsAFinePlace.wav';
% [y, fs, nbits]=wavReadInt(waveFile);
% epdParam=epdParamSet(fs);
% plotOpt=1;
% out=epdByVol(y, fs, nbits, epdParam, plotOpt);
% Roger Jang, 20040413, 20070320
if nargin<1, selfdemo; return; end
if nargin<2, fs=16000; end
if nargin<3, nbits=16; end
if nargin<4 | isempty(epdParam), epdParam=epdParamSet(fs); end
if nargin<5, plotOpt=0; end
if size(y, 2)~=1, error('Wave is not mono!'); end
frameSize=epdParam.frameSize;
overlap=epdParam.overlap;minSegment=round(epdParam.minSegment*fs/(frameSize-overlap));
maxSilBetweenWord=round(epdParam.maxSilBetweenWord*fs/(frameSize-overlap));
%minLastWordDuration=round(epdParam.minLastWordDuration*fs/(frameSize-overlap));
y = double(y); % convert to double data type
frameMat=buffer2(y, frameSize, overlap); % frame blocking
frameMat=frameZeroMean(frameMat, 2); % zero justification
frameNum=size(frameMat, 2); % no. of frames
volume=frame2volume(frameMat, 1); % compute volume
temp=sort(volume);
index=round(frameNum/32); if index==0, index=1; end
volMin=temp(index);
volMax=temp(frameNum-index+1); % To avoid unvoiced sounds
volTh=(volMax-volMin)/epdParam.volRatio+volMin; % compute volume threshold
% ====== Identify voiced part that's larger than volTh2
soundSegment=segmentFind(volume>volTh);
% ====== Delete short sound clips
index = [];
for i=1:length(soundSegment),
if soundSegment(i).duration<=minSegment
index = [index, i];
end
end
soundSegment(index) = [];
zeroOneVec=0*volume;
for i=1:length(soundSegment)
for j=soundSegment(i).begin:soundSegment(i).end
zeroOneVec(j)=1;
end
end
if isempty(soundSegment)
epInSampleIndex=[];
epInFrameIndex=[];
fprintf('Warning: No sound segment found in %s.m.\n', mfilename);
else
epInFrameIndex=[soundSegment(1).begin, soundSegment(end).end];
epInSampleIndex=frame2sampleIndex(epInFrameIndex, frameSize, overlap); % conversion from frame index to sample index
for i=1:length(soundSegment),
soundSegment(i).beginSample = frame2sampleIndex(soundSegment(i).begin, frameSize, overlap);
soundSegment(i).endSample = min(length(y), frame2sampleIndex(soundSegment(i).end, frameSize, overlap));
soundSegment(i).beginFrame = soundSegment(i).begin;
soundSegment(i).endFrame = soundSegment(i).end;
end
soundSegment=rmfield(soundSegment, 'begin');
soundSegment=rmfield(soundSegment, 'end');
soundSegment=rmfield(soundSegment, 'duration');
end
% Plotting...
if plotOpt,
subplot(2,1,1);
time=(1:length(y))/fs;
frameTime=frame2sampleIndex(1:frameNum, frameSize, overlap)/fs;
plot(time, y);
for i=1:length(soundSegment)
line(frameTime(soundSegment(i).beginFrame)*[1 1], 2^nbits/2*[-1, 1], 'color', 'm');
line(frameTime(soundSegment(i).endFrame)*[1 1], 2^nbits/2*[-1, 1], 'color', 'g');
end
axisLimit=[min(time) max(time) -2^nbits/2, 2^nbits/2];
if -1<=min(y) & max(y)<=1
axisLimit=[min(time) max(time) -1, 1];
end
axis(axisLimit);
ylabel('Amplitude');
title('Waveform');
subplot(2,1,2);
plot(frameTime, volume, '.-');
if all(volume)>=0
axis([-inf inf 0 inf]);
else
axis tight;
end
line([min(frameTime), max(frameTime)], volTh*[1 1], 'color', 'r');
line([min(frameTime), max(frameTime)], volMin*[1 1], 'color', 'c');
line([min(frameTime), max(frameTime)], volMax*[1 1], 'color', 'k');
for i=1:length(soundSegment)
line(frameTime(soundSegment(i).beginFrame)*[1 1], [0, max(volume)], 'color', 'm');
line(frameTime(soundSegment(i).endFrame)*[1 1], [0, max(volume)], 'color', 'g');
end
ylabel('Volume');
title('Volume');
U.y=y; U.fs=fs;
if max(U.y)>1, U.y=U.y/(2^nbits/2); end
if ~isempty(epInSampleIndex)
U.voicedY=U.y(epInSampleIndex(1):epInSampleIndex(end));
else
U.voicedY=[];
end
set(gcf, 'userData', U);
uicontrol('string', 'Play all', 'callback', 'U=get(gcf, ''userData''); sound(U.y, U.fs);');
uicontrol('string', 'Play voiced', 'callback', 'U=get(gcf, ''userData''); sound(U.voicedY, U.fs);', 'position', [100, 20, 60, 20]);
end
% ====== Self demo
function selfdemo
waveFile='SingaporeIsAFinePlace.wav';
[y, fs, nbits]=wavReadInt(waveFile);
epdParam=epdParamSet(fs);
plotOpt=1;
out=feval(mfilename, y, fs, nbits, epdParam, plotOpt);
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
57578882test-one (1).zip (53个子文件)
test one
epdByVol.m 5KB
test.rar 323KB
epdParamSet.m 842B
lsum.m 1KB
rfft.m 385B
trainning
s10.wav 51KB
s6.wav 50KB
s8.wav 52KB
s11.wav 56KB
s5.wav 56KB
s14.wav 54KB
s2.wav 52KB
s12.wav 55KB
s4.wav 58KB
s3.wav 55KB
s13.wav 47KB
s1.wav 54KB
s15.wav 52KB
s9.wav 47KB
s7.wav 48KB
segmentFind.m 1KB
demo1.m 2KB
melbankm.m 718B
enframe.m 457B
lmultigauss.m 927B
buffer2.m 530B
test
s10.wav 51KB
s6.wav 50KB
s8.wav 52KB
s11.wav 56KB
s5.wav 56KB
s14.wav 54KB
s2.wav 52KB
s12.wav 55KB
s4.wav 58KB
s3.wav 55KB
s13.wav 47KB
s1.wav 54KB
s15.wav 52KB
s9.wav 47KB
s7.wav 48KB
melcepst.m 615B
rdct.m 463B
speakerGmm.mat 23KB
speakerData.mat 1.01MB
mfccParamSet.m 793B
MFCC_feature_compare.m 721B
frame2volume.m 2KB
gmm_estimate.m 3KB
frameZeroMean.m 2KB
go.m 3KB
frame2sampleIndex.m 889B
getTriFilterParam.m 1KB
共 53 条
- 1
人面何去桃花依旧
- 粉丝: 19
- 资源: 4
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
- 3
- 4
前往页