Beispiel #1
0
int
mfc_module_read(MFCCCalc *mfcc, int *new_t)
{
  FUNC_INT func;
  int ret;

  /* expand area if needed */
  if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
    jlog("ERROR: FEATURE_INPUT plugin: failed to allocate memory\n");
    return -2;
  }
  /* get data */
  ret = mfcc->func.fv_read(mfcc->param->parvec[mfcc->f], mfcc->param->veclen);
  if (ret == -3) {
    /* function requests segmentation of the current recognition */
    mfcc->segmented_by_input = TRUE;
    *new_t = mfcc->f;
    return -3;
  } else if (ret == -1) {
    /* end of input */
    mfcc->segmented_by_input = FALSE;
    *new_t = mfcc->f;
    return -1;
  } else if (ret == -2) {
    /* error */
    jlog("ERROR: FEATURE_INPUT plugin: fvin_read() returns error (-2)\n");
    return -2;
  }
    
  *new_t = mfcc->f + 1;

  return 0;
}  
Beispiel #2
0
/** 
 * <EN>
 * @brief  Split input parameter for segmentation.
 * 
 * Copy the rest samples in param to rest_param, and shrink the param
 * in mfcc instance.  [start...param->samplenum] will be copied to
 * rest_param, and [0...end] will be left in param.
 * </EN>
 * <JA>
 * @brief  セグメンテーション時に入力パラメータを分割する. 
 * 
 * 残りのサンプル(現在のフレームから終わりまで)を rest_param に
 * コピーし,元の param を短くする. [start...param->samplenum] が
 * rest_param にコピーされ,元の param には [0...end] が残る. 
 * </JA>
 * 
 * @param mfcc [i/o] MFCC calculation instance
 * @param start [in] copy start frame
 * @param end [in] original end frame
 * 
 * @callgraph
 * @callergraph
 */
void
mfcc_copy_to_rest_and_shrink(MFCCCalc *mfcc, int start, int end)
{
  int t;

  /* copy rest parameters for next process */
  mfcc->rest_param = new_param();
  memcpy(&(mfcc->rest_param->header), &(mfcc->param->header), sizeof(HTK_Param_Header));
  mfcc->rest_param->samplenum = mfcc->param->samplenum - start;
  mfcc->rest_param->header.samplenum = mfcc->rest_param->samplenum;
  mfcc->rest_param->veclen = mfcc->param->veclen;
  if (param_alloc(mfcc->rest_param, mfcc->rest_param->samplenum, mfcc->rest_param->veclen) == FALSE) {
    j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n");
  }
  /* copy data */
  for(t=start;t<mfcc->param->samplenum;t++) {
    memcpy(mfcc->rest_param->parvec[t-start], mfcc->param->parvec[t], sizeof(VECT) * mfcc->rest_param->veclen);
  }
  
  /* shrink original param */
  /* just shrink the length */
  mfcc->param->samplenum = end;
}
Beispiel #3
0
/** 
 * Read in a HTK parameter file from @a fp .
 * 
 * @param fp [in] file pointer
 * @param pinfo [in] parameter data to store the read informations
 * 
 * @return TRUE on success, FALSE on failure.
 */
static boolean
read_param(FILE *fp, HTK_Param *pinfo)
{
  unsigned int i;
  int v;
  float *a = NULL, *b = NULL;
  char *buf = NULL; /* for uncompressing */
  char *p;
  float d;
  unsigned short c;
  HTK_Param_Header *hd;

  hd = &(pinfo->header);

  /* endian check once */
  /* assume input as BIG ENDIAN */
#ifdef WORDS_BIGENDIAN
  needswap = FALSE;
#else  /* LITTLE ENDIAN */
  needswap = TRUE;
#endif
  
  /* read in headers */
  if(!myread((char *)&(hd->samplenum), sizeof(unsigned int), 1, fp)) return(FALSE);
  /* try to detect wav file */
  if (hd->samplenum == 1380533830) { /* read string "RIFF" as an integer */
    jlog("Error: rdparam: input file is WAV file, not a parameter file\n");
    return FALSE;
  }
    
  /* try to detect and read little-endian parameters from wav2mfcc... */
  if (hd->samplenum >= 60000) {	/* more than 10 minutes! */
    jlog("Warning: rdparam: header says it has %d frames (more than 10 minutes)\n", hd->samplenum);
    jlog("Warning: rdparam: it may be a little endian MFCC\n");
    jlog("Warning: rdparam: now try reading with endian conversion\n");
    swap_bytes((char *)&(hd->samplenum), sizeof(unsigned int), 1);
    needswap = ! needswap;
  }
    
  myread((char *)&(hd->wshift), sizeof(unsigned int), 1, fp);
  myread((char *)&(hd->sampsize), sizeof(unsigned short), 1, fp);
  myread((char *)&(hd->samptype), sizeof(short), 1, fp);
  if (hd->samptype & F_COMPRESS) {
    pinfo->veclen = hd->sampsize / sizeof(short);
  } else {
    pinfo->veclen = hd->sampsize / sizeof(float);
  }

  if (hd->samptype & F_COMPRESS) {
    hd->samplenum -= sizeof(float); /* (-_-) */
    /* read in compression coefficient arrays */
    a = (float *)mymalloc(sizeof(float) * pinfo->veclen);
    b = (float *)mymalloc(sizeof(float) * pinfo->veclen);
    myread((char *)a, sizeof(float), pinfo->veclen, fp);
    myread((char *)b, sizeof(float), pinfo->veclen, fp);
  }
  pinfo->samplenum = hd->samplenum;

  buf = (char *)mymalloc(hd->sampsize);

  /* allocate memory for vectors */
  if (param_alloc(pinfo, pinfo->samplenum, pinfo->veclen) == FALSE) {
    jlog("Error: rdparam: failed to allocate memory for reading MFCC\n");
    return FALSE;
  }

  /* read in parameter vector */
  /* needs conversion of integerized */
  for (i=0;i<pinfo->samplenum;i++) {
    if (hd->samptype & F_COMPRESS) {
      myread(buf, sizeof(short), hd->sampsize / sizeof(short), fp);
      p = buf;
      /* uncompress: (short(2byte) -> float(4byte)) * veclen*/
      for (v=0;v<pinfo->veclen;v++) {
        d = *(short *)p;
        pinfo->parvec[i][v] = (d + b[v]) / a[v];
        p += sizeof(short);
      }
    } else {
      myread(buf, sizeof(float), hd->sampsize / sizeof(float), fp);
      p = buf;
      for (v=0;v<pinfo->veclen;v++) {
        d = *(float *)p;
        pinfo->parvec[i][v] = d;
        p += sizeof(float);
      }
    }
  }

  if (hd->samptype & F_CHECKSUM) {
    /* CRC check (2byte) */
    /* skip this */
    myread((char *)&c, sizeof(unsigned short), 1, fp);
  }

  /*put_param(stdout, pinfo);*/

  free(buf);
  if (hd->samptype & F_COMPRESS) {
    free(b);
    free(a);
  }

  return(TRUE);

}
/** 
 * <JA>
 * @brief  第1パス平行音声認識処理のメイン
 *
 * この関数内では,漸次的な特徴量抽出および第1パスの認識が行われる. 
 * 入力データに対して窓掛け・シフトを行いMFCC計算を行いながら,
 * 音声認識を1フレームずつ並列実行する. 
 *
 * 認識処理(decode_proceed())において,音声区間終了が要求される
 * ことがある. この場合,未処理の音声を保存して第1パスを終了する
 * よう呼出元に要求する. 
 *
 * SPSEGMENT_NAIST あるいは GMM_VAD などのバックエンドVAD定義時は,デコーダベースの
 * VAD (音声区間開始検出)に伴うデコーディング制御が行われる. 
 * トリガ前は,認識処理が呼ばれるが,実際には各関数内で認識処理は
 * 行われていない. 開始を検出した時,この関数はそこまでに得られた
 * MFCC列を一定フレーム長分巻戻し,その巻戻し先から通常の認識処理を
 * 再開する. なお,複数処理インスタンス間がある場合,開始トリガは
 * どれかのインスタンスが検出した時点で全ての開始が同期される. 
 * 
 * この関数は,音声入力ルーチンのコールバックとして呼ばれる.
 * 音声データの数千サンプル録音ごとにこの関数が呼び出される. 
 * 
 * @param Speech [in] 音声データへのバッファへのポインタ
 * @param nowlen [in] 音声データの長さ
 * @param recog [i/o] engine instance
 * 
 * @return エラー時に -1 を,正常時に 0 を返す. また,第1パスを
 * 終了するよう呼出元に要求するときは 1 を返す. 
 * </JA>
 * <EN>
 * @brief  Main function of the on-the-fly 1st pass decoding
 *
 * This function performs sucessive MFCC calculation and 1st pass decoding.
 * The given input data are windowed to a certain length, then converted
 * to MFCC, and decoding for the input frame will be performed in one
 * process cycle.  The loop cycle will continue with window shift, until
 * the whole given input has been processed.
 *
 * In case of input segment request from decoding process (in
 * decode_proceed()), this function keeps the rest un-processed speech
 * to a buffer and tell the caller to stop input and end the 1st pass.
 *
 * When back-end VAD such as SPSEGMENT_NAIST or GMM_VAD is defined,  Decoder-based
 * VAD is enabled and its decoding control will be managed here.
 * In decoder-based VAD mode, the recognition will be processed but
 * no output will be done at the first un-triggering input area.
 * when speech input start is detected, this function will rewind the
 * already obtained MFCC sequence to a certain frames, and re-start
 * normal recognition at that point.  When multiple recognition process
 * instance is running, their segmentation will be synchronized.
 * 
 * This function will be called each time a new speech sample comes as
 * as callback from A/D-in routine.
 * 
 * @param Speech [in] pointer to the speech sample segments
 * @param nowlen [in] length of above
 * @param recog [i/o] engine instance
 * 
 * @return -1 on error (will close stream and terminate recognition),
 * 0 on success (allow caller to call me for the next segment).  It
 * returns 1 when telling the caller to segment now at the middle of
 * input , and 2 when input length overflow is detected.
 * </EN>
 *
 * @callgraph
 * @callergraph
 * 
 */
int
RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = input */
{
  int i, now, ret;
  MFCCCalc *mfcc;
  RealBeam *r;

  r = &(recog->real);

#ifdef DEBUG_VTLN_ALPHA_TEST
  /* store speech */
  adin_cut_callback_store_buffer(Speech, nowlen, recog);
#endif

  /* window[0..windownum-1] は前回の呼び出しで残った音声データが格納されている */
  /* window[0..windownum-1] are speech data left from previous call */

  /* 処理用ポインタを初期化 */
  /* initialize pointer for local processing */
  now = 0;
  
  /* 認識処理がセグメント要求で終わったのかどうかのフラグをリセット */
  /* reset flag which indicates whether the input has ended with segmentation request */
  r->last_is_segmented = FALSE;

#ifdef RDEBUG
  printf("got %d samples\n", nowlen);
#endif

  while (now < nowlen) {	/* till whole input is processed */
    /* 入力長が maxframelen に達したらここで強制終了 */
    /* if input length reaches maximum buffer size, terminate 1st pass here */
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      if (mfcc->f >= r->maxframelen) return(1);
    }
    /* 窓バッファを埋められるだけ埋める */
    /* fill window buffer as many as possible */
    for(i = min(r->windowlen - r->windownum, nowlen - now); i > 0 ; i--)
      r->window[r->windownum++] = (float) Speech[now++];
    /* もし窓バッファが埋まらなければ, このセグメントの処理はここで終わる. 
       処理されなかったサンプル (window[0..windownum-1]) は次回に持ち越し. */
    /* if window buffer was not filled, end processing here, keeping the
       rest samples (window[0..windownum-1]) in the window buffer. */
    if (r->windownum < r->windowlen) break;
#ifdef RDEBUG
    /*    printf("%d used, %d rest\n", now, nowlen - now);

	  printf("[f = %d]\n", f);*/
#endif

    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      mfcc->valid = FALSE;
      /* 窓内の音声波形から特徴量を計算して r->tmpmfcc に格納  */
      /* calculate a parameter vector from current waveform windows
	 and store to r->tmpmfcc */
      if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) {
#ifdef ENABLE_PLUGIN
	/* call post-process plugin if exist */
	plugin_exec_vector_postprocess(mfcc->tmpmfcc, mfcc->param->veclen, mfcc->f);
#endif
	/* MFCC完成,登録 */
  	mfcc->valid = TRUE;
	/* now get the MFCC vector of current frame, now store it to param */
	if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
	  jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
	  return -1;
	}
	memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
#ifdef RDEBUG
	printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f);
#endif
      }
    }

    /* 処理を1フレーム進める */
    /* proceed one frame */
    ret = proceed_one_frame(recog);

    if (ret == 1 && recog->jconf->decodeopt.segment) {
      /* ショートポーズセグメンテーション: バッファに残っているデータを
	 別に保持して,次回の最初に処理する */
      /* short pause segmentation: there is some data left in buffer, so
	 we should keep them for next processing */
      r->rest_len = nowlen - now;
      if (r->rest_len > 0) {
	/* copy rest samples to rest_Speech */
	if (r->rest_Speech == NULL) {
	  r->rest_alloc_len = r->rest_len;
	  r->rest_Speech = (SP16 *)mymalloc(sizeof(SP16)*r->rest_alloc_len);
	} else if (r->rest_alloc_len < r->rest_len) {
	  r->rest_alloc_len = r->rest_len;
	  r->rest_Speech = (SP16 *)myrealloc(r->rest_Speech, sizeof(SP16)*r->rest_alloc_len);
	}
	memcpy(r->rest_Speech, &(Speech[now]), sizeof(SP16) * r->rest_len);
      }
    }
    if (ret != 0) return ret;

    /* 1フレーム処理が進んだのでポインタを進める */
    /* proceed frame pointer */
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      if (!mfcc->valid) continue;
      mfcc->f++;
    }

    /* 窓バッファを処理が終わった分シフト */
    /* shift window */
    memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift));
    r->windownum -= recog->jconf->input.frameshift;
  }

  /* 与えられた音声セグメントに対する認識処理が全て終了
     呼び出し元に, 入力を続けるよう伝える */
  /* input segment is fully processed
     tell the caller to continue input */
  return(0);			
}
/** 
 * <JA>
 * @brief  第1パス平行認識処理の準備
 *
 * 計算用変数をリセットし,各種データを準備する. 
 * この関数は,ある入力(あるいはセグメント)の認識が
 * 始まる前に呼ばれる. 
 * 
 * </JA>
 * <EN>
 * @brief  Preparation for the on-the-fly 1st pass decoding.
 *
 * Variables are reset and data are prepared for the next input recognition.
 *
 * This function will be called before starting each input (segment).
 * 
 * </EN>
 *
 * @param recog [i/o] engine instance
 *
 * @return TRUE on success. FALSE on failure.
 *
 * @callgraph
 * @callergraph
 * 
 */
boolean
RealTimePipeLinePrepare(Recog *recog)
{
  RealBeam *r;
  PROCESS_AM *am;
  MFCCCalc *mfcc;
#ifdef SPSEGMENT_NAIST
  RecogProcess *p;
#endif

  r = &(recog->real);

  /* 計算用の変数を初期化 */
  /* initialize variables for computation */
  r->windownum = 0;
  /* parameter check */
  for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
    /* パラメータ初期化 */
    /* parameter initialization */
    if (recog->jconf->input.speech_input == SP_MFCMODULE) {
      if (mfc_module_set_header(mfcc, recog) == FALSE) return FALSE;
    } else {
      init_param(mfcc);
    }
    /* フレームごとのパラメータベクトル保存の領域を確保 */
    /* あとで必要に応じて伸長される */
    if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) {
      j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n");
    }
    /* フレーム数をリセット */
    /* reset frame count */
    mfcc->f = 0;
  }
  /* 準備した param 構造体のデータのパラメータ型を音響モデルとチェックする */
  /* check type coherence between param and hmminfo here */
  if (recog->jconf->input.paramtype_check_flag) {
    for(am=recog->amlist;am;am=am->next) {
      if (!check_param_coherence(am->hmminfo, am->mfcc->param)) {
	jlog("ERROR: input parameter type does not match AM\n");
	return FALSE;
      }
    }
  }

  /* 計算用のワークエリアを準備 */
  /* prepare work area for calculation */
  if (recog->jconf->input.type == INPUT_WAVEFORM) {
    reset_mfcc(recog);
  }
  /* 音響尤度計算用キャッシュを準備 */
  /* prepare cache area for acoustic computation of HMM states and mixtures */
  for(am=recog->amlist;am;am=am->next) {
    outprob_prepare(&(am->hmmwrk), r->maxframelen);
  }

#ifdef BACKEND_VAD
  if (recog->jconf->decodeopt.segment) {
    /* initialize segmentation parameters */
    spsegment_init(recog);
  }
#else
  recog->triggered = FALSE;
#endif

#ifdef DEBUG_VTLN_ALPHA_TEST
  /* store speech */
  recog->speechlen = 0;
#endif

  return TRUE;
}
/** 
 * <JA>
 * @brief  第1パス平行認識処理の終了処理を行う.
 *
 * この関数は第1パス終了時に呼ばれ,入力長を確定したあと,
 * decode_end() (セグメントで終了したときは decode_end_segmented())を
 * 呼び出して第1パス終了処理を行う. 
 *
 * もし音声入力ストリームの終了によって認識が終わった場合(ファイル入力で
 * 終端に達した場合など)は,デルタバッファに未処理の入力が残っているので,
 * それをここで処理する. 
 *
 * @param recog [i/o] エンジンインスタンス
 * 
 * @return 処理成功時 TRUE, エラー時 FALSE を返す. 
 * </JA>
 * <EN>
 * @brief  Finalize the 1st pass on-the-fly decoding.
 *
 * This function will be called after the 1st pass processing ends.
 * It fix the input length of parameter vector sequence, call
 * decode_end() (or decode_end_segmented() when last input was ended
 * by segmentation) to finalize the 1st pass.
 *
 * If the last input was ended by end-of-stream (in case input reached
 * EOF in file input etc.), process the rest samples remaining in the
 * delta buffers.
 *
 * @param recog [i/o] engine instance
 * 
 * @return TRUE on success, or FALSE on error.
 * </EN>
 */
boolean
RealTimeParam(Recog *recog)
{
  boolean ret1, ret2;
  RealBeam *r;
  int ret;
  int maxf;
  boolean ok_p;
  MFCCCalc *mfcc;
  Value *para;
#ifdef RDEBUG
  int i;
#endif

  r = &(recog->real);

  if (r->last_is_segmented) {

    /* RealTimePipeLine で認識処理側の理由により認識が中断した場合,
       現状態のMFCC計算データをそのまま次回へ保持する必要があるので,
       MFCC計算終了処理を行わずに第1パスの結果のみ出力して終わる. */
    /* When input segmented by recognition process in RealTimePipeLine(),
       we have to keep the whole current status of MFCC computation to the
       next call.  So here we only output the 1st pass result. */
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      mfcc->param->header.samplenum = mfcc->f + 1;/* len = lastid + 1 */
      mfcc->param->samplenum = mfcc->f + 1;
    }
    decode_end_segmented(recog);

    /* この区間の param データを第2パスのために返す */
    /* return obtained parameter for 2nd pass */
    return(TRUE);
  }

  if (recog->jconf->input.type == INPUT_VECTOR) {
    /* finalize real-time 1st pass */
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      mfcc->param->header.samplenum = mfcc->f;
      mfcc->param->samplenum = mfcc->f;
    }
    /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
    decode_end(recog);
    return TRUE;
  }

  /* MFCC計算の終了処理を行う: 最後の遅延フレーム分を処理 */
  /* finish MFCC computation for the last delayed frames */
  for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
    if (mfcc->para->delta || mfcc->para->acc) {
      mfcc->valid = TRUE;
    } else {
      mfcc->valid = FALSE;
    }
  }

  /* loop until all data has been flushed */
  while (1) {

    /* if all mfcc became invalid, exit loop here */
    ok_p = FALSE;
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      if (mfcc->valid) {
	ok_p = TRUE;
	break;
      }
    }
    if (!ok_p) break;

    /* try to get 1 frame for all mfcc instances */
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      
      para = mfcc->para;
      
      if (! mfcc->valid) continue;
      
      /* check if there is data in cycle buffer of delta */
      ret1 = WMP_deltabuf_flush(mfcc->db);
#ifdef RDEBUG
      printf("DeltaBufLast: ret=%d, status=", ret1);
      for(i=0;i<mfcc->db->len;i++) {
	printf("%d", mfcc->db->is_on[i]);
      }
      printf(", nextstore=%d\n", mfcc->db->store);
#endif
      if (ret1) {
	/* uncomputed delta has flushed, compute it with tmpmfcc */
	if (para->energy && para->absesup) {
	  memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * (para->baselen - 1));
	  memcpy(&(mfcc->tmpmfcc[para->baselen-1]), &(mfcc->db->vec[para->baselen]), sizeof(VECT) * para->baselen);
	} else {
	  memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2);
	}
	if (para->acc) {
	  /* this new delta should be given to the accel cycle buffer */
	  ret2 = WMP_deltabuf_proceed(mfcc->ab, mfcc->tmpmfcc);
#ifdef RDEBUG
	  printf("AccelBuf: ret=%d, status=", ret2);
	  for(i=0;i<mfcc->ab->len;i++) {
	    printf("%d", mfcc->ab->is_on[i]);
	  }
	  printf(", nextstore=%d\n", mfcc->ab->store);
#endif
	  if (ret2) {
	    /* uncomputed accel was given, compute it with tmpmfcc */
	    memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
	    memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
	  } else {
	    /* still no input is given: */
	    /* in case of very short input: go on to the next input */
	    continue;
	  }
	}
	
      } else {
      
	/* no data left in the delta buffer */
	if (para->acc) {
	  /* no new data, just flush the accel buffer */
	  ret2 = WMP_deltabuf_flush(mfcc->ab);
#ifdef RDEBUG
	  printf("AccelBuf: ret=%d, status=", ret2);
	  for(i=0;i<mfcc->ab->len;i++) {
	    printf("%d", mfcc->ab->is_on[i]);
	  }
	  printf(", nextstore=%d\n", mfcc->ab->store);
#endif
	  if (ret2) {
	    /* uncomputed data has flushed, compute it with tmpmfcc */
	    memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen));
	    memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen);
	  } else {
	    /* actually no data exists in both delta and accel */
	    mfcc->valid = FALSE; /* disactivate this instance */
	    continue;		/* end this loop */
	  }
	} else {
	  /* only delta: input fully flushed */
	  mfcc->valid = FALSE; /* disactivate this instance */
	  continue;		/* end this loop */
	}
      }
      /* a new frame has been obtained from delta buffer to tmpmfcc */
      if(para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc);
      if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) {
	jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n");
	return FALSE;
      }
      /* store to mfcc->f */
      memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen);
#ifdef ENABLE_PLUGIN
      /* call postprocess plugin if any */
      plugin_exec_vector_postprocess(mfcc->param->parvec[mfcc->f], mfcc->param->veclen, mfcc->f);
#endif
    }

    /* call recognition start callback */
    ok_p = FALSE;
    maxf = 0;
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      if (!mfcc->valid) continue;
      if (maxf < mfcc->f) maxf = mfcc->f;
      if (mfcc->f == 0) {
	ok_p = TRUE;
      }
    }

    if (ok_p && maxf == 0) {
      /* call callback when at least one of MFCC has initial frame */
      if (recog->jconf->decodeopt.segment) {
#ifdef BACKEND_VAD
	  /* not exec pass1 begin callback here */
#else
	if (!recog->process_segment) {
	  callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
	}
	callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
	callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
	recog->triggered = TRUE;
#endif
      } else {
	callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
	callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
	recog->triggered = TRUE;
      }
    }

    /* proceed for the curent frame */
    ret = decode_proceed(recog);
    if (ret == -1) {		/* error */
      return -1;
    } else if (ret == 1) {	/* segmented */
      /* loop out */
      break;
    } /* else no event occured */

#ifdef BACKEND_VAD
    /* check up trigger in case of VAD segmentation */
    if (recog->jconf->decodeopt.segment) {
      if (recog->triggered == FALSE) {
	if (spsegment_trigger_sync(recog)) {
	  if (!recog->process_segment) {
	    callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog);
	  }
	  callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog);
	  callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog);
	  recog->triggered = TRUE;
	}
      }
    }
#endif

    /* call frame-wise callback */
    callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog);

    /* move to next */
    for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
      if (! mfcc->valid) continue;
      mfcc->f++;
      if (mfcc->f > r->maxframelen) mfcc->valid = FALSE;
    }
  }

  /* finalize real-time 1st pass */
  for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) {
    mfcc->param->header.samplenum = mfcc->f;
    mfcc->param->samplenum = mfcc->f;
  }
  /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */
  decode_end(recog);

  return(TRUE);
}
Beispiel #7
0
/** 
 * <JA>
 * 音声波形データから MFCC パラメータを抽出する.
 * エンジンインスタンス内の MFCC 計算インスタンスごとにパラメータ抽出が
 * 行われ,それぞれの mfcc->param に格納される. 
 * 
 * @param speech [in] 音声波形データ
 * @param speechlen [in] @a speech の長さ(単位:サンプル数)
 * @param recog [in] エンジンインスタンス
 * 
 * @return 成功時 TRUE, エラー時 FALSE を返す. 
 * </JA>
 * <EN>
 * Extract MFCC parameters with sentence CMN from given waveform.
 * Parameters will be computed for each MFCC calculation instance
 * in the engine instance, and stored in mfcc->param for each.
 * 
 * @param speech [in] buffer of speech waveform
 * @param speechlen [in] length of @a speech in samples
 * @param recog [in] engine instance
 * 
 * @return TRUE on success, FALSE on error.
 * </EN>
 *
 * @callgraph
 * @callergraph
 */
boolean
wav2mfcc(SP16 speech[], int speechlen, Recog *recog)
{
  int framenum;
  int len;
  Value *para;
  MFCCCalc *mfcc;
  int veclen;
  int t, i;

  /* calculate frame length from speech length, frame size and frame shift */
  framenum = (int)((speechlen - recog->jconf->input.framesize) / recog->jconf->input.frameshift) + 1;
  if (framenum < 1) {
    jlog("WARNING: input too short (%d samples), ignored\n", speechlen);
    return FALSE;
  }

  for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {

    if (mfcc->frontend.ssload_filename) {
      /* setup for spectral subtraction using file */
      if (mfcc->frontend.ssbuf == NULL) {
	/* load noise spectrum for spectral subtraction from file (once) */
	if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) {
	  jlog("ERROR: wav2mfcc: failed to read noise spectrum from file \"%s\"\n", mfcc->frontend.ssload_filename);
	  return FALSE;
	}
      }
    }

    if (mfcc->frontend.sscalc) {
      /* compute noise spectrum from head silence for each input */
      len = mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000;
      if (len > speechlen) len = speechlen;
#ifdef SSDEBUG
      jlog("DEBUG: [%d]\n", len);
#endif
      mfcc->frontend.ssbuf = new_SS_calculate(speech, len, &(mfcc->frontend.sslen), mfcc->frontend.mfccwrk_ss, mfcc->para);
    }

  }

  /* compute mfcc from speech file for each mfcc instances */
  for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) {

    para = mfcc->para;
    veclen = para->veclen * mfcc->splice;

    if (framenum - (mfcc->splice - 1) < 1) {
      jlog("WARNING: input too short (%d samples), ignored\n", speechlen);
      return FALSE;
    }

    /* malloc new param */
    param_init_content(mfcc->param);
    if (param_alloc(mfcc->param, framenum, veclen) == FALSE) {
      jlog("ERROR: failed to allocate memory for converted parameter vectors\n");
      return FALSE;
    }

    if (mfcc->frontend.ssload_filename || mfcc->frontend.sscalc) {
      /* make link from mfccs to this buffer */
      mfcc->wrk->ssbuf = mfcc->frontend.ssbuf;
      mfcc->wrk->ssbuflen = mfcc->frontend.sslen;
      mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha;
      mfcc->wrk->ss_floor = mfcc->frontend.ss_floor;
    }
  
    /* make MFCC from speech data */
    if (Wav2MFCC(speech, mfcc->param->parvec, para, speechlen, mfcc->wrk, mfcc->cmn.wrk) == FALSE) {
      jlog("ERROR: failed to compute features from input speech\n");
      if (mfcc->frontend.sscalc) {
	free(mfcc->frontend.ssbuf);
	mfcc->frontend.ssbuf = NULL;
      }
      return FALSE;
    }

    /* splicing */
    if (mfcc->splice > 1) {
      for (t = 0; t < framenum - (mfcc->splice - 1); t++) {
	for (i = 1; i < mfcc->splice; i++) {
	  memcpy(&(mfcc->param->parvec[t][para->veclen * i]), &(mfcc->param->parvec[t + i][0]), sizeof(VECT) * para->veclen);
	}
      }
    }

    /* set miscellaneous parameters */
    mfcc->param->header.samplenum = framenum - (mfcc->splice - 1);
    mfcc->param->header.wshift = para->smp_period * para->frameshift;
    mfcc->param->header.sampsize = veclen * sizeof(VECT); /* not compressed */
    mfcc->param->header.samptype = para->basetype;
    if (para->delta) mfcc->param->header.samptype |= F_DELTA;
    if (para->acc) mfcc->param->header.samptype |= F_ACCL;
    if (para->energy) mfcc->param->header.samptype |= F_ENERGY;
    if (para->c0) mfcc->param->header.samptype |= F_ZEROTH;
    if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP;
    if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM;
    mfcc->param->veclen = veclen;
    mfcc->param->samplenum = framenum - (mfcc->splice - 1);

    if (mfcc->frontend.sscalc) {
      free(mfcc->frontend.ssbuf);
      mfcc->frontend.ssbuf = NULL;
    }
  }

  return TRUE;
}