// Tor's Speech Recognition reference code...
//
// Copyright (C) 2001
// Tor Aamodt <[email protected]>
// University of Toronto
//
// For use with the "Float-to-Fixed Conversion Tool"
// available from http://www.float-to-fixed.com
// Achieves 100% recognition accuracy with the following inputs:
// http://www.eecg.utoronto.ca/~aamodt/ece341/speech-recognition/train_test.dsp.gz
//
#ifdef GCCmod
#include "traps.h"
#else
#ifndef WORKSTATION
#define WORKSTATION
#endif
#endif
#ifdef WORKSTATION
#ifdef WIN32
#include "StdAfx.h"
#endif
#include <math.h>
#include <stdio.h>
#include <assert.h>
#pragma warning(disable : 4005)
#endif
#define NSAMPLES_PER_INTERVAL 250
#define NSAMPLES 4000
#define NDIM 80
#define NWORDS 5
float Buffer[NSAMPLES];
float word[NDIM];
float dic[NWORDS][NDIM];
void initialize();
void get_sample();
void analyze( float *, float * );
int lookup( float[][NDIM], float * );
void control( int, int );
float euclidean( float *a, float *b, int length );
float filter1( float x );
float filter2( float x );
float filter3( float x );
float filter4( float x );
float filter5( float x );
void main()
{
int i, j;
#ifdef WORKSTATION
float max;
#endif
initialize(); // open files (if testing on workstation),
// initialize interrupts (if on UltraGizmo board)
for( i=0; i<NWORDS; i++ ) {
get_sample(); // read in a word sample (assume framing is done by low level routine)
analyze( Buffer, dic[i] );
#ifdef WORKSTATION
max = 0.0f;
for( j=0; j < NDIM; j++ ) {
if( dic[i][j] > max ) max = dic[i][j];
}
printf("word %d max = %f\n", i, max );
#endif
}
for( i=0; i<NWORDS; i++ ) {
int command = -1;
// assume keyboard i/o is used to interrupt this loop
get_sample();
analyze( Buffer, word );
#ifdef WORKSTATION
printf( "doing recognition on test vector %d...\n", i );
#endif
command = lookup( dic, word );
control( command, i );
#ifdef WORKSTATION
printf( "\n" );
#endif
}
}
////////////////////////////////////////////////////////////////////////////////
//
#ifdef WORKSTATION
FILE *g_infile;
void initialize()
{
g_infile = fopen( "train_test.dsp", "r" );
assert( g_infile );
}
#else
void initialize() { }
#endif
////////////////////////////////////////////////////////////////////////////////
void get_sample()
{
#ifdef WORKSTATION
int i;
for( i=0; i < 4000; i++ ) {
fscanf( g_infile, "%g\n", Buffer+i );
}
#endif
#ifdef GCCmod
input_dsp( Buffer, NSAMPLES, 0 );
#endif
}
////////////////////////////////////////////////////////////////////////////////
void analyze( float *sample, float *fingerprint )
{
int i, k, x;
k = 0;
for( i=0; i < NSAMPLES; i+= NSAMPLES_PER_INTERVAL ) {
int j;
double y1=0.0f, y2=0.0f, y3=0.0f, y4=0.0f, y5=0.0f;
double m;
for( j=0; j < NSAMPLES_PER_INTERVAL; j++ ) {
float y = filter1( sample[i+j] );
y1 += y*y;
}
m = frexp(y1+1, &x);
fingerprint[k++] = x+m;
for( j=0; j < NSAMPLES_PER_INTERVAL; j++ ) {
float y = filter2( sample[i+j] );
y2 += y*y;
}
m = frexp(y2+1, &x);
fingerprint[k++] = x+m;
for( j=0; j < NSAMPLES_PER_INTERVAL; j++ ) {
float y = filter3( sample[i+j] );
y3 += y*y;
}
m = frexp(y3+1, &x);
fingerprint[k++] = x+m;
for( j=0; j < NSAMPLES_PER_INTERVAL; j++ ) {
float y = filter4( sample[i+j] );
y4 += y*y;
}
m = frexp(y4+1, &x);
fingerprint[k++] = x+m;
for( j=0; j < NSAMPLES_PER_INTERVAL; j++ ) {
float y = filter5( sample[i+j] );
y5 += y*y;
}
m = frexp(y5+1, &x);
fingerprint[k++] = x+m;
}
}
int lookup( float dic[][NDIM], float *word )
{
int result, i;
float min = 0;
// by default assume the first word...
result = 0;
min = euclidean( dic[0], word, NDIM );
#ifdef WORKSTATION
printf( "distance 0 = %g\n", min );
#endif
for( i=1; i < NWORDS; i++ ) {
float tmp;
tmp = euclidean( dic[i], word, NDIM );
#ifdef WORKSTATION
printf( "distance %d = %g\n", i, tmp );
#endif
if( tmp < min ) {
min = tmp;
result = i;
}
}
return result;
}
float euclidean( float *a, float *b, int length )
{
int i;
float result=0.0f;
for( i=0; i < length; i++ ) {
float tmp = a[i]-b[i];
result += tmp*tmp;
}
return result;
}
////////////////////////////////////////////////////////////////////////////////
void control( int command, int i )
{
#ifdef WORKSTATION
printf("word %d was recognized as word %d\n", i, command );
#endif
#ifdef GCCmod
output_dsp( &command, 1, 1 );
#endif
}
#define A11 -1.42116311276060f
#define A21 0.51814706507531f
#define B01 0.05114107979340f
#define B11 -0.01658864464707f
#define B21 0.05114107979340f
#define A12 -1.70960095672886f
#define A22 0.80456561825031f
#define B02 2.51696834558782f
#define B12 -3.92574709726961f
#define B22 2.51696834558782f
#define G 0.09698395231471f
float filter1( float x )
{
static float d01 = 0.0;
static float d11 = 0.0;
static float d02 = 0.0;
static float d12 = 0.0;
float y1, y2, t0, t1;
/* first 2nd-order filter stage */
t0 = x - A11*d01 - A21*d11;
y1 = B01*t0 + B11*d01 + B21*d11;
d11 = d01;
d01 = t0;
/* second 2nd-order filter stage */
t1 = y1 - A12*d02 - A22*d12;
y2 = B02*t1 + B12*d02 + B21*d12;
d12 = d02;
d02 = t1;
return G*y2;
}
#undef A11
#undef A21
#undef B01
#undef B11
#undef B21
#undef A12
#undef A22
#undef B02
#undef B12
#undef B22
#undef G
/******************************************************************************/
#define A11 -1.53842908421193f
#define A21 0.84855856514738f
#define B01 0.14854092486721f
#define B11 -0.18034827240976f
#define B21 0.14854092486721f
#define A12 -1.71544355837494f
#define A22 0.88466680917388f
#define B02 7.79767408119502f
#define B12 -15.05579486140652f
#define B22 7.79767408119503f
#define G 0.08332454239234f
float filter2( float x )
{
static float d01 = 0.0;
static float d11 = 0.0;
static float d02 = 0.0;
static float d12 = 0.0;
float y1, y2, t0, t1;
/* first 2nd-order filter stage */
t0 = x - A11*d01 - A21*d11;
y1 = B01*t0 + B11*d01 + B21*d11;
d11 = d01;
d01 = t0;
/* second 2nd-order filter stage */
t1 = y1 - A12*d02 - A22*d12;
y2 = B02*t1 + B12*d02 + B21*d12;
d12 = d02;
d02 = t1;
return G*y2;
}
#undef A11
#undef A21
#undef B01
#undef B11
#undef B21
#undef A12
#undef A22
#undef B02
#undef B12
#undef B22
#undef G
/******************************************************************************/
#define A11 -0.68409535605940f
#define A21 0.81492279180742f
#define B01 0.27236347795640f
#define B11 -0.00976249855546f
#define B21 0.27236347795640f
#define A12 -1.03911439917059f
#define A22 0.83366555983124f
#define B02 2.13458312080822f
#define B12 -3.28873727029416f
#define B22 2.13458312080822f
#define G 0.17130780020174f
float filter3( float x )
{
static float d01 = 0.0;
static float d11 = 0.0;
static float d02 = 0.0;
static float d12 = 0.0;
float y1, y2, t0, t1;
/* first 2nd-order filter stage */
t0 = x - A11*d01 - A21*d11;
y1 = B01*t0 + B11*d01 + B21*d11;
d11 = d01;
d01 = t0;
/* second 2nd-order filter stage */
t1 = y1 - A12*d02 - A22*d12;
y2 = B02*t1 + B12*d02 + B21*d12;
d12 = d02;
d02 = t1;
return G*y2;
}
#undef A11
#undef A21
#undef B01
#undef B11
#undef B21
#undef A12
#undef A22
#undef B02
#undef B12
#undef B22
#undef G
/******************************************************************************/
#define A11 0.10267001417836f
#define A21 0.82136833847176f
#define B01 0.35674871341912f
#define B11 -0.19766424653713f
#define B21 0.35674871341912f
#define A12 0.49110760091747f
#define A22 0.82712351283885f
#define B02 1.56380116360973f
#define B12 1.7159728855329