/** * Convert wave data to MFCC. Also does spectral subtraction * if @a ssbuf specified. * * @param wave [in] waveform data * @param mfcc [out] buffer to store the resulting MFCC parameter vector [t][0..veclen-1], should be already allocated * @param para [in] configuration parameters * @param nSamples [in] length of waveform data * @param w [i/o] MFCC calculation work area * * @return the number of processed frames. */ int Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w, CMNWork *c) { int i, k, t; int end = 0, start = 1; int frame_num; /* Number of samples in output file */ /* set noise spectrum if any */ if (w->ssbuf != NULL) { /* check ssbuf length */ if (w->ssbuflen != w->bflen) { jlog("Error: mfcc-core: noise spectrum length not match\n"); return FALSE; } } frame_num = (int)((nSamples - para->framesize) / para->frameshift) + 1; for(t = 0; t < frame_num; t++){ if(end != 0) start = end - (para->framesize - para->frameshift) - 1; k = 1; for(i = start; i <= start + para->framesize; i++){ w->bf[k] = (float)wave[i - 1]; k++; } end = i; /* Calculate base MFCC coefficients */ WMP_calc(w, mfcc[t], para); } /* Normalise Log Energy */ if (para->energy && para->enormal) NormaliseLogE(mfcc, frame_num, para); /* Delta (consider energy suppress) */ if (para->delta) Delta(mfcc, frame_num, para); /* Acceleration */ if (para->acc) Accel(mfcc, frame_num, para); /* Cepstrum Mean and/or Variance Normalization */ if (para->cmn && ! para->cvn) CMN(mfcc, frame_num, para->mfcc_dim + (para->c0 ? 1 : 0), c); else if (para->cmn || para->cvn) MVN(mfcc, frame_num, para, c); return(frame_num); }
/** * <JA> * @brief 音声波形からパラメータベクトルを計算する. * * 窓単位で取り出された音声波形からMFCCベクトルを計算する. * 計算結果は mfcc->tmpmfcc に保存される. * * @param mfcc [i/o] MFCC計算インスタンス * @param window [in] 窓単位で取り出された音声波形データ * @param windowlen [in] @a window の長さ * * @return 計算成功時,TRUE を返す. デルタ計算において入力フレームが * 少ないなど,まだ得られていない場合は FALSE を返す. * </JA> * <EN> * @brief Compute a parameter vector from a speech window. * * This function calculates an MFCC vector from speech data windowed from * input speech. The obtained MFCC vector will be stored to mfcc->tmpmfcc. * * @param mfcc [i/o] MFCC calculation instance * @param window [in] speech input (windowed from input stream) * @param windowlen [in] length of @a window * * @return TRUE on success (an vector obtained). Returns FALSE if no * parameter vector obtained yet (due to delta delay). * </EN> * * @callgraph * @callergraph * */ boolean RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen) { int i; boolean ret; VECT *tmpmfcc; Value *para; tmpmfcc = mfcc->tmpmfcc; para = mfcc->para; /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */ /* calculate base MFCC from waveform (use recog->mfccwrk) */ for (i=0; i < windowlen; i++) { mfcc->wrk->bf[i+1] = (float) window[i]; } WMP_calc(mfcc->wrk, tmpmfcc, para); if (para->energy && para->enormal) { /* 対数エネルギー項を正規化する */ /* normalize log energy */ /* リアルタイム入力では発話ごとの最大エネルギーが得られないので 直前の発話のパワーで代用する */ /* Since the maximum power of the whole input utterance cannot be obtained at real-time input, the maximum of last input will be used to normalize. */ tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para); } if (para->delta) { /* デルタを計算する */ /* calc delta coefficients */ ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc); #ifdef RDEBUG printf("DeltaBuf: ret=%d, status=", ret); for(i=0;i<mfcc->db->len;i++) { printf("%d", mfcc->db->is_on[i]); } printf(", nextstore=%d\n", mfcc->db->store); #endif /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ /* if ret == FALSE, there is no available frame. So just wait for next input */ if (! ret) { return FALSE; } /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */ /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */ memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); } if (para->acc) { /* Accelerationを計算する */ /* calc acceleration coefficients */ /* base+delta をそのまま入れる */ /* send the whole base+delta to the cycle buffer */ ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc); #ifdef RDEBUG printf("AccelBuf: ret=%d, status=", ret); for(i=0;i<mfcc->ab->len;i++) { printf("%d", mfcc->ab->is_on[i]); } printf(", nextstore=%d\n", mfcc->ab->store); #endif /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ /* if ret == FALSE, there is no available frame. So just wait for next input */ if (! ret) { return FALSE; } /* ab->vec には,(base+delta) とその差分係数が入っている. [base] [delta] [delta] [acc] の順で入っているので, [base] [delta] [acc] を tmpmfcc にコピーする. */ /* now ab->vec holds the current (base+delta) and their delta coef. it holds a vector in the order of [base] [delta] [delta] [acc], so copy the [base], [delta] and [acc] to tmpmfcc. */ memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2); memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen); } #ifdef POWER_REJECT if (para->energy || para->c0) { mfcc->avg_power += tmpmfcc[para->baselen-1]; } #endif if (para->delta && (para->energy || para->c0) && para->absesup) { /* 絶対値パワーを除去 */ /* suppress absolute power */ memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen)); } /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */ /* tmpmfcc[] now holds the latest parameter vector */ /* CMN を計算 */ /* perform CMN */ if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc); return TRUE; }