Ejemplo n.º 1
0
/** 
 * Convert wave data to MFCC.  Also does spectral subtraction
 * if @a ssbuf specified.
 * 
 * @param wave [in] waveform data
 * @param mfcc [out] buffer to store the resulting MFCC parameter vector [t][0..veclen-1], should be already allocated
 * @param para [in] configuration parameters
 * @param nSamples [in] length of waveform data
 * @param w [i/o] MFCC calculation work area
 * 
 * @return the number of processed frames.
 */
int
Wav2MFCC(SP16 *wave, float **mfcc, Value *para, int nSamples, MFCCWork *w, CMNWork *c)
{
  int i, k, t;
  int end = 0, start = 1;
  int frame_num;                    /* Number of samples in output file */

  /* set noise spectrum if any */
  if (w->ssbuf != NULL) {
    /* check ssbuf length */
    if (w->ssbuflen != w->bflen) {
      jlog("Error: mfcc-core: noise spectrum length not match\n");
      return FALSE;
    }
  }

  frame_num = (int)((nSamples - para->framesize) / para->frameshift) + 1;
  
  for(t = 0; t < frame_num; t++){
    if(end != 0) start = end - (para->framesize - para->frameshift) - 1;

    k = 1;
    for(i = start; i <= start + para->framesize; i++){
      w->bf[k] = (float)wave[i - 1];  k++;
    }
    end = i;
    
    /* Calculate base MFCC coefficients */
    WMP_calc(w, mfcc[t], para);
  }
  
  /* Normalise Log Energy */
  if (para->energy && para->enormal) NormaliseLogE(mfcc, frame_num, para);
  
  /* Delta (consider energy suppress) */
  if (para->delta) Delta(mfcc, frame_num, para);

  /* Acceleration */
  if (para->acc) Accel(mfcc, frame_num, para);

  /* Cepstrum Mean and/or Variance Normalization */
  if (para->cmn && ! para->cvn) CMN(mfcc, frame_num, para->mfcc_dim + (para->c0 ? 1 : 0), c);
  else if (para->cmn || para->cvn) MVN(mfcc, frame_num, para, c);

  return(frame_num);
}
Ejemplo n.º 2
0
/** 
 * <JA>
 * @brief  音声波形からパラメータベクトルを計算する.
 * 
 * 窓単位で取り出された音声波形からMFCCベクトルを計算する.
 * 計算結果は mfcc->tmpmfcc に保存される. 
 * 
 * @param mfcc [i/o] MFCC計算インスタンス
 * @param window [in] 窓単位で取り出された音声波形データ
 * @param windowlen [in] @a window の長さ
 * 
 * @return 計算成功時,TRUE を返す. デルタ計算において入力フレームが
 * 少ないなど,まだ得られていない場合は FALSE を返す. 
 * </JA>
 * <EN>
 * @brief  Compute a parameter vector from a speech window.
 *
 * This function calculates an MFCC vector from speech data windowed from
 * input speech.  The obtained MFCC vector will be stored to mfcc->tmpmfcc.
 * 
 * @param mfcc [i/o] MFCC calculation instance
 * @param window [in] speech input (windowed from input stream)
 * @param windowlen [in] length of @a window
 * 
 * @return TRUE on success (an vector obtained).  Returns FALSE if no
 * parameter vector obtained yet (due to delta delay).
 * </EN>
 *
 * @callgraph
 * @callergraph
 * 
 */
boolean
RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen)
{
  int i;
  boolean ret;
  VECT *tmpmfcc;
  Value *para;

  tmpmfcc = mfcc->tmpmfcc;
  para = mfcc->para;

  /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */
  /* calculate base MFCC from waveform (use recog->mfccwrk) */
  for (i=0; i < windowlen; i++) {
    mfcc->wrk->bf[i+1] = (float) window[i];
  }
  WMP_calc(mfcc->wrk, tmpmfcc, para);

  if (para->energy && para->enormal) {
    /* 対数エネルギー項を正規化する */
    /* normalize log energy */
    /* リアルタイム入力では発話ごとの最大エネルギーが得られないので
       直前の発話のパワーで代用する */
    /* Since the maximum power of the whole input utterance cannot be
       obtained at real-time input, the maximum of last input will be
       used to normalize.
    */
    tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para);
  }

  if (para->delta) {
    /* デルタを計算する */
    /* calc delta coefficients */
    ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc);
#ifdef RDEBUG
    printf("DeltaBuf: ret=%d, status=", ret);
    for(i=0;i<mfcc->db->len;i++) {
      printf("%d", mfcc->db->is_on[i]);
    }
    printf(", nextstore=%d\n", mfcc->db->store);
#endif
    /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
    /* if ret == FALSE, there is no available frame.  So just wait for
       next input */
    if (! ret) {
      return FALSE;
    }

    /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */
    /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */
    memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
  }

  if (para->acc) {
    /* Accelerationを計算する */
    /* calc acceleration coefficients */
    /* base+delta をそのまま入れる */
    /* send the whole base+delta to the cycle buffer */
    ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc);
#ifdef RDEBUG
    printf("AccelBuf: ret=%d, status=", ret);
    for(i=0;i<mfcc->ab->len;i++) {
      printf("%d", mfcc->ab->is_on[i]);
    }
    printf(", nextstore=%d\n", mfcc->ab->store);
#endif
    /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */
    /* if ret == FALSE, there is no available frame.  So just wait for
       next input */
    if (! ret) {
      return FALSE;
    }
    /* ab->vec には,(base+delta) とその差分係数が入っている. 
       [base] [delta] [delta] [acc] の順で入っているので,
       [base] [delta] [acc] を tmpmfcc にコピーする. */
    /* now ab->vec holds the current (base+delta) and their delta coef. 
       it holds a vector in the order of [base] [delta] [delta] [acc], 
       so copy the [base], [delta] and [acc] to tmpmfcc.  */
    memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2);
    memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen);
  }

#ifdef POWER_REJECT
  if (para->energy || para->c0) {
    mfcc->avg_power += tmpmfcc[para->baselen-1];
  }
#endif

  if (para->delta && (para->energy || para->c0) && para->absesup) {
    /* 絶対値パワーを除去 */
    /* suppress absolute power */
    memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen));
  }

  /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */
  /* tmpmfcc[] now holds the latest parameter vector */

  /* CMN を計算 */
  /* perform CMN */
  if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc);

  return TRUE;
}