endPointDetect.rar_Endpoint-detection资源-CSDN文库

共1个文件

m：1个

版权申诉

192 浏览量 2022-07-14 09:14:54 上传评论收藏 3KB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

endPointDetect.rar （1个子文件）

endPointDetect.m 14KB

function [epInSampleIndex, epInFrameIndex, soundSegment, zeroOneVec, others] = endPointDetect(au, opt, showPlot) % endPointDetect: EPD based on volume and HOD (high-order difference) % % Usage: % [epInSampleIndex, epInFrameIndex, soundSegment, zeroOneVec, others] = endPointDetect(au, opt, showPlot) % % Description: % [epInSampleIndex, epInFrameIndex, soundSegment, zeroOneVec, others] = endPointDetect(au, opt, showPlot) % epInSampleIndex: two-element end-points in sample index % epInFrameIndex: two-element end-points in frame index % soundSegment: segment of voice activity % zeroOneVec: zero-one vector for each frame % others: other outputs, which depends on opt.method. % au: audio object % opt: parameters for EPD % showPlot: 0 for silence operation, 1 for plotting % % Example: % waveFile='SingaporeIsAFinePlace.wav'; % au=myAudioRead(waveFile); % opt=endPointDetect('defaultOpt'); % opt.method='vol'; % showPlot = 1; % [epInSampleIndex, epInFrameIndex, soundSegment] = endPointDetect(au, opt, showPlot); % Category: Audio endpoint detection % Roger Jang, 20070323 if nargin<1, selfdemo; return; end if ischar(au) && strcmpi(au, 'defaultOpt') % Set the default options epInSampleIndex.method='volZcr'; % Default method epInSampleIndex.frameDuration=256/8000; % Frame size (fs=16000 ===> frameSize=256) epInSampleIndex.overlapDuration=0; % Frame overlap % The followings are mainly for method='vol' epInSampleIndex.volRatio=0.1; epInSampleIndex.vMinMaxPercentile=3; % For method='volzcr'; epInSampleIndex.volRatio2=0.2; % Not used for now epInSampleIndex.zcrRatio=0.1; epInSampleIndex.zcrShiftGain=4; % For epdByEntropy epInSampleIndex.veRatio=0.1; epInSampleIndex.veMinMaxPercentile=3; % For method='volhod' epInSampleIndex.vhRatio=0.012; % 0.11 epInSampleIndex.diffOrder=1; epInSampleIndex.volWeight=0.76; epInSampleIndex.vhMinMaxPercentile=2.3; % 5.0% epInSampleIndex.extendNum=1; % Extend front and back epInSampleIndex.minSegment=0.068; % Sound segments (in seconds) shorter than or equal to this value are removed epInSampleIndex.maxSilBetweenSegment=0.416; % epInSampleIndex.minLastWordDuration=0.2; % return end if nargin<2||isempty(opt), opt=feval(mfilename, 'defaultOpt'); end if nargin<3, showPlot=0; end opt.frameSize=round(opt.frameDuration*au.fs); opt.overlap=round(opt.overlapDuration*au.fs); y=au.signal; fs=au.fs; nbits=au.nbits; if size(y, 2)~=1, error('Wave is not mono!'); end frameSize=opt.frameSize; overlap=opt.overlap; minSegment=round(opt.minSegment*fs/(frameSize-overlap)); maxSilBetweenSegment=round(opt.maxSilBetweenSegment*fs/(frameSize-overlap)); %minLastWordDuration=round(opt.minLastWordDuration*fs/(frameSize-overlap)); y=double(y); % convert to double data type frameMat=enframe(y, frameSize, overlap); % frame blocking frameMat=frameZeroMean(frameMat, 2); % zero justification frameNum=size(frameMat, 2); % no. of frames switch(lower(opt.method)) case 'vol' volume=frame2volume(frameMat); % compute volume temp=sort(volume); index=round(frameNum*opt.vMinMaxPercentile/100); if index==0, index=1; end volMin=temp(index); volMax=temp(frameNum-index+1); volTh=(volMax-volMin)*opt.volRatio+volMin; % compute volume threshold % ====== Identify voiced part that's larger than volTh soundSegment=segmentFind(volume>volTh); % ====== Delete short sound clips index=[soundSegment.duration]<=minSegment; soundSegment(index)=[]; % ====== Create zero-one vector zeroOneVec=logical(0*volume); for i=1:length(soundSegment) zeroOneVec(soundSegment(i).begin:soundSegment(i).end)=1; end if isempty(soundSegment) epInSampleIndex=[]; epInFrameIndex=[]; fprintf('Warning: No sound segment found in %s.m.\n', mfilename); else epInFrameIndex=[soundSegment(1).begin, soundSegment(end).end]; epInSampleIndex=frame2sampleIndex(epInFrameIndex, frameSize, overlap); % conversion from frame index to sample index for i=1:length(soundSegment), soundSegment(i).beginSample = frame2sampleIndex(soundSegment(i).begin, frameSize, overlap); soundSegment(i).endSample = min(length(y), frame2sampleIndex(soundSegment(i).end, frameSize, overlap)); soundSegment(i).beginFrame = soundSegment(i).begin; soundSegment(i).endFrame = soundSegment(i).end; end soundSegment=rmfield(soundSegment, 'begin'); soundSegment=rmfield(soundSegment, 'end'); % soundSegment=rmfield(soundSegment, 'duration'); end others.volume=volume; others.volTh=volTh; case 'volzcr' volume=frame2volume(frameMat); % compute volume temp=sort(volume); index=round(frameNum*opt.vMinMaxPercentile/100); if index==0, index=1; end volMin=temp(index); volMax=temp(frameNum-index+1); % To avoid unvoiced sounds volTh1=(volMax-volMin)*opt.volRatio+volMin; % compute volume threshold volTh2=(volMax-volMin)*opt.volRatio2+volMin; % compute volume threshold % ====== Identify voiced part that's larger than volTh1 soundSegment=segmentFind(volume>volTh1); % ====== Compute ZCR [minVol, index]=min(volume); shiftAmount=opt.zcrShiftGain*max(abs(frameMat(:,index))); % shiftAmount is equal to opt.zcrShiftGain times the max. abs. sample within the frame of min. volume %shiftAmount=max(shiftAmount, 2); shiftAmount=max(shiftAmount, max(frameMat(:))/100); zcr=frame2zcr(frameMat, 1, shiftAmount); zcrTh=max(zcr)*opt.zcrRatio; % ====== Expansion 1: Expand end points to volume level1 (lower level) for i=1:length(soundSegment), head = soundSegment(i).begin; while (head-1)>=1 & volume(head-1)>=volTh1, head=head-1; end soundSegment(i).begin = head; tail = soundSegment(i).end; while (tail+1)<=length(volume) & volume(tail+1)>=volTh1, tail=tail+1; end soundSegment(i).end = tail; end % ====== Expansion 2: Expand end points to include high zcr region for i=1:length(soundSegment), head = soundSegment(i).begin; while (head-1)>=1 & zcr(head-1)>zcrTh % Extend at beginning head=head-1; end soundSegment(i).begin = head; tail = soundSegment(i).end; while (tail+1)<=length(zcr) & zcr(tail+1)>zcrTh % Extend at ending tail=tail+1; end soundSegment(i).end = tail; end % ====== Delete repeated sound segments index = []; for i=1:length(soundSegment)-1, if soundSegment(i).begin==soundSegment(i+1).begin & soundSegment(i).end==soundSegment(i+1).end, index=[index, i]; end end soundSegment(index) = []; % ====== Delete short sound clips index = []; for i=1:length(soundSegment) soundSegment(i).duration=soundSegment(i).end-soundSegment(i).begin+1; % This is necessary since the duration is changed due to expansion if soundSegment(i).duration<=minSegment index = [index, i]; end end soundSegment(index) = []; zeroOneVec=logical(0*volume); for i=1:length(soundSegment) for j=soundSegment(i).begin:soundSegment(i).end zeroOneVec(j)=1; end end if isempty(soundSegment) epInSampleIndex=[]; epInFrameIndex=[]; fprintf('Warning: No sound segment found in %s.m.\n', mfilename); else epInFrameIndex=[soundSegment(1).begin, soundSegment(end).end]; epInSampleIndex=frame2sampleIndex(epInFrameIndex, frameSize, overlap); % conversion from frame index to sample index for i=1:length(soundSegment), soundSegment(i).beginSample = frame2sampleIndex(soundSegment(i).begin, frameSize, overlap); soundSegment(i).endSample = min(length(y), frame2sampleIndex(soundSegment(i).end, frameSize, overlap)); soundSegment(i).beginFrame = soundSegment(i).begin; soundSegment(i).endFrame = soundSegment(i).end; end soundSegment=rmfield(soundSegment, 'begin'); soundSegment=rmfield(soundSegment, 'end'); % soundSegment=rmfield(soundSegment, 'duration'); end others.volume=volume; others.volTh1=v

评论收藏

内容反馈

版权申诉