/**Calculates the mel-based cepstra coefficients for one frame of speech.
* Based on the original MFCC implementation described in:
* [1] Davis & Mermelstein - IEEE Transactions on ASSP, August 1980.
* Additional references are:
* [2] Joseph Picone, Proceedings of the IEEE, Sep. 1993.
* [3] Jankowski et al. IEEE Trans. on Speech and Audio Processing. July, 1995.
* [4] Cardin et al, ICASSP'93 - pp. II-243
*
* Notice that there are several different implementations of the mel filter
* bank. For example, the log is usually implementated after having the filter
* outputs calculated, but could be implemented before filtering. Besides, there are
* differences in the specification of the filter frequencies. [1]
* suggested linear scale until 1000 Hz and logarithm scale afterwards.
* This implementation uses the equation (10) in [2]:
* mel frequency = 2595 log(1 + (f/700)), where log is base 10
* to find the filter bank center frequencies.
*
* @author Aldebaro Klautau
* @version 2.0 - March 07, 2001
* @see MFCCPatternGenerator
*/
//if m_oisZeroThCepstralCoefficientCalculated is true,
//this class decrements m_nnumberOfParameters by 1 and
//adds the 0-th coefficient to complete a vector with
//the number of MFCC's specified by the user.
public class MFCC {
// parameter USEPOWER in HTK, where default is false
private static final boolean m_ousePowerInsteadOfMagnitude = false;
// Number of MFCCs per speech frame.
//每帧信号的MFCC维数
private final int m_nnumberOfParameters;
/**
* Sampling frequency.
*/
//采样率
private final double m_dsamplingFrequency;
/**
* Number of filter in mel filter bank.
*/
//梅尔滤波器的个数
private final int m_nnumberOfFilters;
/**
* Number of FFT points.
*/
//每次FFT变换的点数
private final int m_nFFTLength;
/**
* Coefficient of filtering performing in cepstral domain (called
* 'liftering' operation). It is not used if m_oisLifteringEnabled is false.
*/
//如果m_ois。。是false,就用不到
private final int m_nlifteringCoefficient;
/**
* True enables liftering.
*/
//一个允许“同态滤波参数”的标志位
private final boolean m_oisLifteringEnabled;
/**
* Minimum value of filter output, otherwise the log is not calculated and
* m_dlogFilterOutputFloor is adopted. ISIP implementation assumes
* m_dminimumFilterOutput = 1 and this value is used here.
*/
//滤波器输出的最小值
private final double m_dminimumFilterOutput = 1.0;
/**
* True if the zero'th MFCC should be calculated.
*/
//设置对数能量是否应该被计算
private final boolean m_oisZeroThCepstralCoefficientCalculated;
/**
* Floor value for filter output in log domain. ISIP implementation assumes
* m_dlogFilterOutputFloor = 0 and this value is used here.
*/
//滤波器在对数域输出的最低限值
private final double m_dlogFilterOutputFloor = 0.0;
private int[][] m_nboundariesDFTBins;
private double[][] m_dweights;
private FFT m_fft;
private double[][] m_ddCTMatrix;
private double[] m_dfilterOutput;
private final double[] m_nlifteringMultiplicationFactor;
// things to be calculated just once:
private final double m_dscalingFactor;
/**
* The 0-th coefficient is included in nnumberOfParameters. So, if one wants
* 12 MFCC's and additionally the 0-th coefficient, one should call the
* constructor with nnumberOfParameters = 13 and
* oisZeroThCepstralCoefficientCalculated = true
*/
//对数能量也可以包括在n维的参数中,所以如果你想要12维MFCC外加对数能量参数,你应该在构造函数中设参数为13维,并且设置短时能量计算为True
// 构造函数中各种初始化分配内存等操作
public MFCC(int nnumberOfParameters, double dsamplingFrequency,
int nnumberofFilters, int nFFTLength, boolean oisLifteringEnabled,
int nlifteringCoefficient,
boolean oisZeroThCepstralCoefficientCalculated) {
m_oisZeroThCepstralCoefficientCalculated = oisZeroThCepstralCoefficientCalculated;
if (m_oisZeroThCepstralCoefficientCalculated) {
// the user shouldn't notice that nnumberOfParameters was
// decremented internally
m_nnumberOfParameters = nnumberOfParameters - 1;
} else {
m_nnumberOfParameters = nnumberOfParameters;
}
m_dsamplingFrequency = dsamplingFrequency;
m_nnumberOfFilters = nnumberofFilters;
m_nFFTLength = nFFTLength;
// the filter bank weights, FFT's cosines and sines
// and DCT matrix are initialized once to save computations.
//初始化各种参数来减少计算量
// initializes the mel-based filter bank structure
//初始化梅尔三角滤波器结构
calculateMelBasedFilterBank(dsamplingFrequency, nnumberofFilters,
nFFTLength);
m_fft = new FFT(m_nFFTLength); // initialize FFT 初始化FFT
initializeDCTMatrix();//初始化DCT矩阵
m_nlifteringCoefficient = nlifteringCoefficient;
m_oisLifteringEnabled = oisLifteringEnabled;
// avoid allocating RAM space repeatedly, m_dfilterOutput is
// going to be used in method getParameters() 避免分开分配内存,m_dfilterOutput数组将会在getParametrers()中用到
m_dfilterOutput = new double[m_nnumberOfFilters]; //滤波器输出数组
// needed in method getParameters()
// m_dscalingFactor shouldn't be necessary because it's only
// a scaling(缩放) factor, but I'll implement it
// for the sake of(为了) getting the same numbers ISIP gets
m_dscalingFactor = Math.sqrt(2.0 / m_nnumberOfFilters);
// for liftering method
if (m_oisLifteringEnabled) {
// note that:
@SuppressWarnings("unused")
int nnumberOfCoefficientsToLift = m_nnumberOfParameters;
// even when m_oisZeroThCepstralCoefficientCalculated is true
// because if 0-th cepstral coefficient is included,
// it is not liftered
m_nlifteringMultiplicationFactor = new double[m_nlifteringCoefficient];
double dfactor = m_nlifteringCoefficient / 2.0;
double dfactor2 = Math.PI / m_nlifteringCoefficient;
for (int i = 0; i < m_nlifteringCoefficient; i++) {
m_nlifteringMultiplicationFactor[i] = 1.0 + dfactor
* Math.sin(dfactor2 * (i + 1));
}
if (m_nnumberOfParameters > m_nlifteringCoefficient) {
new Error(
"Liftering is enabled and the number "
+ "of parameters = "
+ m_nnumberOfParameters
+ ", while "
+ "the liftering coefficient is "
+ m_nlifteringCoefficient
+ ". In this case some cepstrum coefficients would be made "
+ "equal to zero due to liftering, what does not make much "
+ "sense in a speech recognition system. You may want to "
+ "increase the liftering coefficient or decrease the number "
+ "of MFCC parameters.");
}
} else {
m_nlifteringMultiplicationFactor = null;
}
}
/** Initializes the DCT matrix. */
private void initializeDCTMatrix() {
m_ddCTMatrix = new double[m_nnumberOfParameters][m_nnumberOfFilters];
for (int i = 0; i < m_nnumberOfParameters; i++) {
for (int j = 0; j < m_nnumberOfFilters; j++) {
m_ddCTMatrix[i][j] = Math.cos((i + 1.0) * (j + 1.0 - 0.5) //此处因为i和j都是从0到m_nnumber-1计算的,而正规公式是1到m_nnumber计算的,所以计算中有“i+1”和“j+1”这一块
* (Math.PI / m_nnumberOfFilters));
}
}
}
/**
* Converts frequencies in Hz to mel scale according to mel frequency = 2595
* log(1 + (f/700)), where log is base 10 and f is the frequency in Hz.
*/ //将频率转换为梅尔频率,此处采样率并没有用到,不知道为什么要加这个参数
public static double[] convertHzToMel(double[] dhzFrequencies,
double dsamplingFrequency) {
double[] dmelFrequencies = new double[dhzFrequencies.length];
for (int k = 0; k < dhzFrequencies.length; k++) {
dmelFrequencies[k] = 2595.0 * (Math
.log(1.0 + (dhzFrequencies[k] / 700.0)) / Math.log(10));
}
return dmelFrequencies;
}
/**
* Calculates triangular filters. 三角带通滤波器(Triangular Bandpass
* Filters):将能量频谱能量乘以一组 20 个三角带通滤波器�