void CMFCC::init(float SampleF,char* mempool, int &mpidx,long DefFFTLen,long DefFrameLen,long DefSubBandNum, long DefCepstrumNum) { long M, SubBandNo, i, j; float ms,pi_factor, mfnorm, a, melk, t; MPidx = &mpidx; m_pMemPool = mempool; historyMPidx = mpidx; FrameLen = DefFrameLen; SubBandNum = DefSubBandNum; CepstrumNum = DefCepstrumNum; initFFT(m_pMemPool,mpidx,DefFFTLen); FFT_LEN = GetFFTAnalyseLen(); // std::cout <<"FFT_LEN = "<<FFT_LEN<<std::endl; if(((unsigned int)(m_pMemPool + mpidx))%4) mpidx += 4 - ((unsigned int)(m_pMemPool + mpidx))%4; cosTab = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * (SubBandNum+1)*(SubBandNum+1); //DCT变换系数 hamWin = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * FrameLen; //hamming 窗系数 cepWin = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * (SubBandNum+1); MelBandBoundary = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * (SubBandNum+2); SubBandWeight = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * FFT_LEN/2; SubBandIndex = (long*)(m_pMemPool + mpidx); mpidx += sizeof(long) * FFT_LEN/2; SubBandEnergy = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * (SubBandNum+2); //for accumulating the corresponding FFTFrame = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * (FFT_LEN); //for accumulating the corresponding Cepstrum = (float*)(m_pMemPool + mpidx); mpidx += sizeof(float) * (SubBandNum+1); //for accumulating the corresponding fres = SampleF/(FFT_LEN*700.0f); //caculating the mel scale sub-band boundary //由于子带0的系数(直流分量)不参与MFCC特征计算,所以子带个数要多一个 M = SubBandNum+1; ms = Mel(FFT_LEN/2); //计算1/2采样率所对应的MEL刻度 //Note that the sub-band 0 is not used for cepstrum caculating for ( SubBandNo = 0; SubBandNo <= M; SubBandNo++ ) { //计算每个子带的起始MEL刻度 MelBandBoundary[SubBandNo] = ( (float)SubBandNo/(float)M )*ms; } //mapping the FFT frequence component into the corresponding sub-band for ( i=0,SubBandNo=1; i< FFT_LEN/2; i++) { melk = Mel(i); while( MelBandBoundary[SubBandNo] < melk ) SubBandNo++; SubBandIndex[i] = SubBandNo-1; } //caculating the weighting coefficients for each FFT frequence components for(i=0; i< FFT_LEN/2; i++) { //以子带的起始MEL频率为中心,计算三角窗加权系数 SubBandNo = SubBandIndex[i]; SubBandWeight[i] = (MelBandBoundary[SubBandNo+1]-Mel(i))/(MelBandBoundary[SubBandNo+1]-MelBandBoundary[SubBandNo]); } pi_factor = (float)( asin(1.0)*2.0/(float)SubBandNum ); mfnorm = (float)sqrt(2.0f/(float)SubBandNum); for( i=1; i<= CepstrumNum; i++ ) { t = (float)i*pi_factor; for(j=1; j<=SubBandNum; j++) cosTab[i*(SubBandNum+1)+j] = (float)cos(t*(j-0.5f))*mfnorm; } a =(float)( asin(1.0)*4/(FrameLen-1) ); for(i=0;i<FrameLen;i++) hamWin[i] = 0.54f - 0.46f * (float)cos(a*i); for(i=1;i<=CepstrumNum;i++) cepWin[i-1] = (float)i * (float)exp(-(float)i*2.0/(float)CepstrumNum); }
CMFCC::CMFCC(float SampleF,long DefFFTLen,long DefFrameLen,long DefSubBandNum, long DefCepstrumNum):CFFTanalyser(DefFFTLen) { long M, SubBandNo, i, j; float ms,pi_factor, mfnorm, a, melk, t; FrameLen = DefFrameLen; SubBandNum = DefSubBandNum; CepstrumNum = DefCepstrumNum; FFT_LEN = GetFFTAnalyseLen(); // std::cout <<"FFT_LEN = "<<FFT_LEN<<std::endl; cosTab = new float[(SubBandNum+1)*(SubBandNum+1)]; //DCT变换系数 hamWin = new float[FrameLen]; //hamming 窗系数 cepWin = new float[SubBandNum+1]; MelBandBoundary = new float[SubBandNum+2]; SubBandWeight = new float[FFT_LEN/2]; //weighting coefficients for //energy of each FFT frequence component SubBandIndex = new long[FFT_LEN/2]; //mapping of the frequence to sub-band No. SubBandEnergy = new float[SubBandNum+2]; //for accumulating the corresponding FFTFrame = new float[FFT_LEN]; Cepstrum = new float[SubBandNum+1]; fres = SampleF/(FFT_LEN*700.0f); //caculating the mel scale sub-band boundary //由于子带0的系数(直流分量)不参与MFCC特征计算,所以子带个数要多一个 M = SubBandNum+1; ms = Mel(FFT_LEN/2); //计算1/2采样率所对应的MEL刻度 //Note that the sub-band 0 is not used for cepstrum caculating for ( SubBandNo = 0; SubBandNo <= M; SubBandNo++ ) { //计算每个子带的起始MEL刻度 MelBandBoundary[SubBandNo] = ( (float)SubBandNo/(float)M )*ms; } //mapping the FFT frequence component into the corresponding sub-band for ( i=0,SubBandNo=1; i< FFT_LEN/2; i++) { melk = Mel(i); while( MelBandBoundary[SubBandNo] < melk ) SubBandNo++; SubBandIndex[i] = SubBandNo-1; } //caculating the weighting coefficients for each FFT frequence components for(i=0; i< FFT_LEN/2; i++) { //以子带的起始MEL频率为中心,计算三角窗加权系数 SubBandNo = SubBandIndex[i]; SubBandWeight[i] = (MelBandBoundary[SubBandNo+1]-Mel(i))/(MelBandBoundary[SubBandNo+1]-MelBandBoundary[SubBandNo]); } pi_factor = (float)( asin(1.0)*2.0/(float)SubBandNum ); mfnorm = (float)sqrt(2.0f/(float)SubBandNum); for( i=1; i<= CepstrumNum; i++ ) { t = (float)i*pi_factor; for(j=1; j<=SubBandNum; j++) cosTab[i*(SubBandNum+1)+j] = (float)cos(t*(j-0.5f))*mfnorm; } a =(float)( asin(1.0)*4/(FrameLen-1) ); for(i=0;i<FrameLen;i++) hamWin[i] = 0.54f - 0.46f * (float)cos(a*i); for(i=1;i<=CepstrumNum;i++) cepWin[i-1] = (float)i * (float)exp(-(float)i*2.0/(float)CepstrumNum); }
/** * Build filterbank information and generate tables for MFCC comptutation. * * @param w [i/o] MFCC calculation work area * @param para [in] configuration parameters * * @return the generated filterbank information. */ boolean InitFBank(MFCCWork *w, Value *para) { float mlo, mhi, ms, melk; int k, chan, maxChan, nv2; /* Calculate FFT size */ w->fb.fftN = 2; w->fb.n = 1; while(para->framesize > w->fb.fftN){ w->fb.fftN *= 2; w->fb.n++; } nv2 = w->fb.fftN / 2; w->fb.fres = 1.0E7 / (para->smp_period * w->fb.fftN * 700.0); maxChan = para->fbank_num + 1; w->fb.klo = 2; w->fb.khi = nv2; mlo = 0; mhi = Mel(nv2 + 1, w->fb.fres); /* lo pass filter */ if (para->lopass >= 0) { mlo = 1127*log(1+(float)para->lopass/700.0); w->fb.klo = ((float)para->lopass * para->smp_period * 1.0e-7 * w->fb.fftN) + 2.5; if (w->fb.klo<2) w->fb.klo = 2; } /* hi pass filter */ if (para->hipass >= 0) { mhi = 1127*log(1+(float)para->hipass/700.0); w->fb.khi = ((float)para->hipass * para->smp_period * 1.0e-7 * w->fb.fftN) + 0.5; if (w->fb.khi>nv2) w->fb.khi = nv2; } /* Create vector of fbank centre frequencies */ w->fb.cf = (float *)mymalloc((maxChan + 1) * sizeof(float)); ms = mhi - mlo; for (chan = 1; chan <= maxChan; chan++) w->fb.cf[chan] = ((float)chan / maxChan)*ms + mlo; if (para->vtln_alpha != 1.0) { /* Modify fbank center frequencies for VTLN */ if (VTLN_recreate_fbank_cf(w->fb.cf, para, mlo, mhi, maxChan) == FALSE) { return FALSE; } } /* Create loChan map, loChan[fftindex] -> lower channel index */ w->fb.loChan = (short *)mymalloc((nv2 + 1) * sizeof(short)); for(k = 1, chan = 1; k <= nv2; k++){ if (k < w->fb.klo || k > w->fb.khi) w->fb.loChan[k] = -1; else { melk = Mel(k, w->fb.fres); while (w->fb.cf[chan] < melk && chan <= maxChan) ++chan; w->fb.loChan[k] = chan - 1; } } /* Create vector of lower channel weights */ w->fb.loWt = (float *)mymalloc((nv2 + 1) * sizeof(float)); for(k = 1; k <= nv2; k++) { chan = w->fb.loChan[k]; if (k < w->fb.klo || k > w->fb.khi) w->fb.loWt[k] = 0.0; else { if (chan > 0) w->fb.loWt[k] = (w->fb.cf[chan + 1] - Mel(k, w->fb.fres)) / (w->fb.cf[chan + 1] - w->fb.cf[chan]); else w->fb.loWt[k] = (w->fb.cf[1] - Mel(k, w->fb.fres)) / (w->fb.cf[1] - mlo); } } /* Create workspace for fft */ w->fb.Re = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float)); w->fb.Im = (float *)mymalloc((w->fb.fftN + 1) * sizeof(float)); w->sqrt2var = sqrt(2.0 / para->fbank_num); return TRUE; }
/* EXPORT->InitFBank: Initialise an FBankInfo record */ FBankInfo InitFBank(MemHeap *x, int frameSize, long sampPeriod, int numChans, float lopass, float hipass, Boolean usePower, Boolean takeLogs, Boolean doubleFFT, float alpha, float warpLowCut, float warpUpCut) { FBankInfo fb; float mlo,mhi,ms,melk; int k,chan,maxChan,Nby2; /* Save sizes to cross-check subsequent usage */ fb.frameSize = frameSize; fb.numChans = numChans; fb.sampPeriod = sampPeriod; fb.usePower = usePower; fb.takeLogs = takeLogs; /* Calculate required FFT size */ fb.fftN = 2; while (frameSize>fb.fftN) fb.fftN *= 2; if (doubleFFT) fb.fftN *= 2; Nby2 = fb.fftN / 2; fb.fres = 1.0E7/(sampPeriod * fb.fftN * 700.0); maxChan = numChans+1; /* set lo and hi pass cut offs if any */ fb.klo = 2; fb.khi = Nby2; /* apply lo/hi pass filtering */ mlo = 0; mhi = Mel(Nby2+1,fb.fres); if (lopass>=0.0) { mlo = 1127*log(1+lopass/700.0); fb.klo = (int) ((lopass * sampPeriod * 1.0e-7 * fb.fftN) + 2.5); if (fb.klo<2) fb.klo = 2; } if (hipass>=0.0) { mhi = 1127*log(1+hipass/700.0); fb.khi = (int) ((hipass * sampPeriod * 1.0e-7 * fb.fftN) + 0.5); if (fb.khi>Nby2) fb.khi = Nby2; } if (trace&T_MEL){ printf("FFT passband %d to %d out of 1 to %d\n",fb.klo,fb.khi,Nby2); printf("Mel passband %f to %f\n",mlo,mhi); } /* Create vector of fbank centre frequencies */ fb.cf = CreateVector(x,maxChan); ms = mhi - mlo; for (chan=1; chan <= maxChan; chan++) { if (alpha == 1.0) { fb.cf[chan] = ((float)chan/(float)maxChan)*ms + mlo; } else { /* scale assuming scaling starts at lopass */ float minFreq = 700.0 * (exp (mlo / 1127.0) - 1.0 ); float maxFreq = 700.0 * (exp (mhi / 1127.0) - 1.0 ); float cf = ((float)chan / (float) maxChan) * ms + mlo; cf = 700 * (exp (cf / 1127.0) - 1.0); fb.cf[chan] = 1127.0 * log (1.0 + WarpFreq (warpLowCut, warpUpCut, cf, minFreq, maxFreq, alpha) / 700.0); } } /* Create loChan map, loChan[fftindex] -> lower channel index */ fb.loChan = CreateShortVec(x,Nby2); for (k=1,chan=1; k<=Nby2; k++){ melk = Mel(k,fb.fres); if (k<fb.klo || k>fb.khi) fb.loChan[k]=-1; else { while (fb.cf[chan] < melk && chan<=maxChan) ++chan; fb.loChan[k] = chan-1; } } /* Create vector of lower channel weights */ fb.loWt = CreateVector(x,Nby2); for (k=1; k<=Nby2; k++) { chan = fb.loChan[k]; if (k<fb.klo || k>fb.khi) fb.loWt[k]=0.0; else { if (chan>0) fb.loWt[k] = ((fb.cf[chan+1] - Mel(k,fb.fres)) / (fb.cf[chan+1] - fb.cf[chan])); else fb.loWt[k] = (fb.cf[1]-Mel(k,fb.fres))/(fb.cf[1] - mlo); } } /* Create workspace for fft */ fb.x = CreateVector(x,fb.fftN); return fb; }