typedef float *Vector; /* vector[1..size] */
/* -------------------- MFCC Related Operations -------------------- */
/* EXPORT->Mel: return mel-frequency corresponding to given FFT index */
float Mel(int k,float fres)
{
return 1127 * log(1 + (k-1)*fres);
}
/* EXPORT->WarpFreq: return warped frequency */
float WarpFreq (float fcl, float fcu, float freq, float minFreq, float maxFreq , float alpha)
{
if (alpha == 1.0)
return freq;
else {
float scale = 1.0 / alpha;
float cu = fcu * 2 / (1 + scale);
float cl = fcl * 2 / (1 + scale);
float au = (maxFreq - cu * scale) / (maxFreq - cu);
float al = (cl * scale - minFreq) / (cl - minFreq);
if (freq > cu)
return au * (freq - cu) + scale * cu ;
else if (freq < cl)
return al * (freq - minFreq) + minFreq ;
else
return scale * freq ;
}
}
/* EXPORT->InitFBank: Initialise an FBankInfo record */
FBankInfo InitFBank(MemHeap *x, int frameSize, long sampPeriod, int numChans,
float lopass, float hipass, Boolean usePower, Boolean takeLogs,
Boolean doubleFFT,
float alpha, float warpLowCut, float warpUpCut)
{
FBankInfo fb;
float mlo,mhi,ms,melk;
int k,chan,maxChan,Nby2;
/* Save sizes to cross-check subsequent usage */
fb.frameSize = frameSize; fb.numChans = numChans;
fb.sampPeriod = sampPeriod;
fb.usePower = usePower; fb.takeLogs = takeLogs;
/* Calculate required FFT size */
fb.fftN = 2;
while (frameSize>fb.fftN) fb.fftN *= 2;
if (doubleFFT)
fb.fftN *= 2;
Nby2 = fb.fftN / 2;
fb.fres = 1.0E7/(sampPeriod * fb.fftN * 700.0);
maxChan = numChans+1;
/* set lo and hi pass cut offs if any */
fb.klo = 2; fb.khi = Nby2; /* apply lo/hi pass filtering */
mlo = 0; mhi = Mel(Nby2+1,fb.fres);
if (lopass>=0.0) {
mlo = 1127*log(1+lopass/700.0);
fb.klo = (int) ((lopass * sampPeriod * 1.0e-7 * fb.fftN) + 2.5);
if (fb.klo<2) fb.klo = 2;
}
if (hipass>=0.0) {
mhi = 1127*log(1+hipass/700.0);
fb.khi = (int) ((hipass * sampPeriod * 1.0e-7 * fb.fftN) + 0.5);
if (fb.khi>Nby2) fb.khi = Nby2;
}
if (trace&T_MEL){
printf("FFT passband %d to %d out of 1 to %d\n",fb.klo,fb.khi,Nby2);
printf("Mel passband %f to %f\n",mlo,mhi);
}
/* Create vector of fbank centre frequencies */
fb.cf = CreateVector(x,maxChan);
ms = mhi - mlo;
for (chan=1; chan <= maxChan; chan++) {
if (alpha == 1.0) {
fb.cf[chan] = ((float)chan/(float)maxChan)*ms + mlo;
}
else {
/* scale assuming scaling starts at lopass */
float minFreq = 700.0 * (exp (mlo / 1127.0) - 1.0 );
float maxFreq = 700.0 * (exp (mhi / 1127.0) - 1.0 );
float cf = ((float)chan / (float) maxChan) * ms + mlo;
cf = 700 * (exp (cf / 1127.0) - 1.0);
fb.cf[chan] = 1127.0 * log (1.0 + WarpFreq (warpLowCut, warpUpCut, cf, minFreq, maxFreq, alpha) / 700.0);
}
}
/* Create loChan map, loChan[fftindex] -> lower channel index */
fb.loChan = CreateShortVec(x,Nby2);
for (k=1,chan=1; k<=Nby2; k++){
melk = Mel(k,fb.fres);
if (k<fb.klo || k>fb.khi) fb.loChan[k]=-1;
else {
while (fb.cf[chan] < melk && chan<=maxChan) ++chan;
fb.loChan[k] = chan-1;
}
}
/* Create vector of lower channel weights */
fb.loWt = CreateVector(x,Nby2);
for (k=1; k<=Nby2; k++) {
chan = fb.loChan[k];
if (k<fb.klo || k>fb.khi) fb.loWt[k]=0.0;
else {
if (chan>0)
fb.loWt[k] = ((fb.cf[chan+1] - Mel(k,fb.fres)) /
(fb.cf[chan+1] - fb.cf[chan]));
else
fb.loWt[k] = (fb.cf[1]-Mel(k,fb.fres))/(fb.cf[1] - mlo);
}
}
/* Create workspace for fft */
fb.x = CreateVector(x,fb.fftN);
return fb;
}
/* EXPORT->Wave2FBank: Perform filterbank analysis on speech s */
void Wave2FBank(Vector s, Vector fbank, float *te, FBankInfo info)
{
const float melfloor = 1.0;
int k, bin;
float t1,t2; /* real and imag parts */
float ek; /* energy of k'th fft channel */
/* Check that info record is compatible */
if (info.frameSize != VectorSize(s))
HError(5321,"Wave2FBank: frame size mismatch");
if (info.numChans != VectorSize(fbank))
HError(5321,"Wave2FBank: num channels mismatch");
/* Compute frame energy if needed */
if (te != NULL){
*te = 0.0;
for (k=1; k<=info.frameSize; k++)
*te += (s[k]*s[k]);
}
/* Apply FFT */
for (k=1; k<=info.frameSize; k++)
info.x[k] = s[k]; /* copy to workspace */
for (k=info.frameSize+1; k<=info.fftN; k++)
info.x[k] = 0.0; /* pad with zeroes */
Realft(info.x); /* take fft */
/* Fill filterbank channels */
ZeroVector(fbank);
for (k = info.klo; k <= info.khi; k++) { /* fill bins */
t1 = info.x[2*k-1]; t2 = info.x[2*k];
if (info.usePower)
ek = t1*t1 + t2*t2;
else
ek = sqrt(t1*t1 + t2*t2);
bin = info.loChan[k];
t1 = info.loWt[k]*ek;
if (bin>0) fbank[bin] += t1;
if (bin<info.numChans) fbank[bin+1] += ek - t1;
}
/* Take logs */
if (info.takeLogs)
for (bin=1; bin<=info.numChans; bin++) {
t1 = fbank[bin];
if (t1<melfloor) t1 = melfloor;
fbank[bin] = log(t1);
}
}
/* EXPORT->FBank2MFCC: compute first n cepstral coeff */
void FBank2MFCC(Vector fbank, Vector c, int n)
{
int j,k,numChan;
float mfnorm,pi_factor,x;
numChan = VectorSize(fbank);
mfnorm = sqrt(2.0/(float)numChan);
pi_factor = PI/(float)numChan;
for (j=1; j<=n; j++) {
c[j] = 0.0; x = (float)j * pi_factor;
for (k=1; k<=numChan; k++)
c[j] += fbank[k] * cos(x*(k-0.5));
c[j] *= mfnorm;
}
}
/* EXPORT->FBank2MelSpec: convert log fbank to linear */
void FBank2MelSpec(Vector fbank)
{
int i;
for (i=1; i<=VectorSize(fbank); i++)
fbank[i] = exp(fbank[i]);
}
/* EXPORT->MelSpec2FBank: convert lin mel spectrum to log fbank */
void MelSpec2FBank(Vector melspec)
{
int i;
float x;
for (i=1; i<=VectorSize(melspec); i++){
x = melspec[i];
if (x<1.0) x = 1.0;
melspec[i] = log(x);
}
}
/* EXPORT->FBank2C0: return zero'th cepstral coefficient */
float FBank2C0(Vector fbank)
{
int k,numChan;
float mfnorm,sum;
numChan = VectorSize(fbank);
mfnorm = sqrt(2.0/(float)numChan);
sum = 0.0;
for (k=1; k<=numChan; k++)
sum += fbank[k];
return sum * mfnorm;
}