예제 #1
0
/** 
* <JA>
* 最後の1単語の前向きトレリスを計算して,文仮説の前向き尤度を更新する. 
* 
* @param now [i/o] 文仮説
* @param param [in] 入力パラメータ列
* @param r [in] 認識処理インスタンス
* 
* </JA>
* <EN>
* Compute the forward viterbi for the last word to update forward scores
* and ready for word connection.
* 
* @param now [i/o] hypothesis
* @param param [in] input parameter vectors
* @param r [in] recognition process instance
* 
* </EN>
* @callgraph
* @callergraph
*/
void
scan_word(NODE *now, HTK_Param *param, RecogProcess *r)
{
  int   i,t, j;
  HMM *whmm;
  A_CELL *ac;
  WORD_ID word;
  LOGPROB tmpmax, tmptmp, score1;
  int startt = 0, endt = 0;
  int wordhmmnum;
  LOGPROB tmpmax_store, store_point_maxarc; /* multipath */
  LOGPROB tmpmax2 = LOG_ZERO;
  int phmmlen;
  HMM_Logical *ret, *wend;
  int store_point;
  int crossword_point = 0;
  boolean back_rescan = FALSE;
  boolean node_exist_p;
  int tn=0;		       ///< Temporal pointer to current buffer
  int tl=0;		       ///< Temporal pointer to previous buffer

  /* store global values to local for rapid access */
  WORD_INFO *winfo;
  HTK_HMM_INFO *hmminfo;
  LOGPROB *framemaxscore;
  int peseqlen;
  boolean ccd_flag;
  boolean enable_iwsp;
#ifdef SCAN_BEAM
  LOGPROB scan_beam_thres;
#endif
  StackDecode *dwrk;

  winfo = r->lm->winfo;
  hmminfo = r->am->hmminfo;
  dwrk = &(r->pass2);
  peseqlen = r->peseqlen;
  framemaxscore = r->pass2.framemaxscore;
  ccd_flag = r->ccd_flag;
  enable_iwsp = r->lm->config->enable_iwsp; /* multipath */
#ifdef SCAN_BEAM
  scan_beam_thres = r->config->pass2.scan_beam_thres;
#endif

  if (hmminfo->multipath) {
    store_point = -1;
  } else {
    store_point = 0;
  }

  /* ----------------------- prepare HMM ----------------------- */

  if (ccd_flag) {
    /* 直前の音素があれば,そこまでさかのぼって scan する */
    /* if there are any last phone, enable backscan */
    if (now->last_ph == NULL) {
      /* initial score: now->g[] */
      /* scan range: phones in now->seq[now->seqnum-1] */
      back_rescan = FALSE;
    } else {
      /* initial score: now->g_prev[] (1-phone before)*/
      /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */
      back_rescan = TRUE;
    }
  }
#ifdef TCD
  if (now->last_ph != NULL) {
    jlog("DEBUG: inherited last_ph: %s\n", (now->last_ph)->name);
    if (now->last_ph_sp_attached) jlog("DEBUG: (sp attached)\n"); /* multipath */
  } else {
    jlog("DEBUG: no last_ph inherited\n");
  }
#endif

  /* scan 範囲分のHMMを準備 */
  /* prepare HMM of the scan range */
  word = now->seq[now->seqnum-1];

  if (ccd_flag) {

    if (back_rescan) {

      /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */

      phmmlen = winfo->wlen[word] + 1;
      if (phmmlen > dwrk->phmmlen_max) {
        j_internal_error("scan_word: num of phonemes in a word exceed phmmlenmax (%d) ?\n", dwrk->phmmlen_max);
      }
      for (i=0;i<phmmlen - 2;i++) dwrk->phmmseq[i] = winfo->wseq[word][i];
      if (enable_iwsp && hmminfo->multipath) {
        for (i=0;i<phmmlen - 2;i++) dwrk->has_sp[i] = FALSE;
      }

      /* 最終単語と last_ph 間の単語間triphoneを考慮 */
      /* consider cross-word context dependency between the last word and now->last_ph */
      wend = winfo->wseq[word][winfo->wlen[word]-1];
      ret = get_right_context_HMM(wend, now->last_ph->name, hmminfo);
      if (ret == NULL) {	/* triphone not found */
        /* fallback to the original bi/mono-phone */
        /* error if the original is pseudo phone (not explicitly defined
        in hmmdefs/hmmlist) */
        /* exception: word with 1 phone (triphone may exist in the next expansion */
        if (winfo->wlen[word] > 1 && wend->is_pseudo) {
          error_missing_right_triphone(wend, now->last_ph->name);
        }
        dwrk->phmmseq[phmmlen-2] = wend;
      } else {
        dwrk->phmmseq[phmmlen-2] = ret;
      }
      ret = get_left_context_HMM(now->last_ph, wend->name, hmminfo);
      if (ret == NULL) {
        /* fallback to the original bi/mono-phone */
        /* error if the original is pseudo phone (not explicitly defined
        in hmmdefs/hmmlist) */
        if (now->last_ph->is_pseudo) {
          error_missing_left_triphone(now->last_ph, wend->name);
        }
        dwrk->phmmseq[phmmlen-1] = now->last_ph;
      } else {
        dwrk->phmmseq[phmmlen-1] = ret;
      }

      if (enable_iwsp && hmminfo->multipath) {
        dwrk->has_sp[phmmlen-2] = TRUE;
        dwrk->has_sp[phmmlen-1] = now->last_ph_sp_attached;
      }

#ifdef TCD
      jlog("DEBUG: w=");
      for(i=0;i<winfo->wlen[word];i++) {
        jlog(" %s",(winfo->wseq[word][i])->name);
        if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)");
      }
      jlog(" | %s\n", (now->last_ph)->name);
      if (hmminfo->multipath && now->last_ph_sp_attached) jlog("DEBUG:   (sp)\n");
      jlog("DEBUG: scan for:");

      for (i=0;i<phmmlen;i++) {
        jlog(" %s", dwrk->phmmseq[i]->name);
        if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)");
      }
      jlog("\n");
#endif

      /* 単語HMMを作る */
      /* make word HMM */
      whmm = new_make_word_hmm(hmminfo, dwrk->phmmseq, phmmlen, (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL);
      if (whmm == NULL) {
        j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]);
      }

      /* backscan なので,計算前の g[] 初期値は now->g_prev[] を使用 */
      /* As backscan enabled, the initial forward score g[] is set by
      now->g_prev[] */
      for (t=0;t<peseqlen;t++) {
        dwrk->g[t]=now->g_prev[t];

      }

      /* 次段用のg_prevを格納するノード位置を設定 */
      /* set where to store scores as new g_prev[] for the next backscan
      in the HMM */
      if (hmminfo->multipath) {
        store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2;
        store_point_maxarc = max_out_arc(dwrk->phmmseq[0]);
        if (enable_iwsp && dwrk->has_sp[0]) {
          store_point += hmm_logical_state_num(hmminfo->sp) - 2;
          if (store_point_maxarc < max_out_arc(hmminfo->sp)) {
            store_point_maxarc = max_out_arc(hmminfo->sp);
          }
        }
      } else {
        store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2 - 1;
      }
      /* scan中に直前単語とこの単語をまたぐ場所を設定 */
      /* set where is the connection point of the last word in the HMM */
      if (hmminfo->multipath) {
        crossword_point = whmm->len - hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]);
        if (enable_iwsp && dwrk->has_sp[phmmlen-1]) {
          crossword_point -= hmm_logical_state_num(hmminfo->sp) - 2;
        }
      } else {
        crossword_point = whmm->len - (hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]) - 2) - 1;
      }

    } else {			/* not backscan mode */

      /* scan range: phones in now->seq[now->seqnum-1] */

#ifdef TCD
      jlog("DEBUG: scan(org):");
      for (i=0;i<winfo->wlen[word];i++) {
        jlog(" %s", (winfo->wseq[word][i])->name);
      }
      jlog("\n");
#endif

      if (enable_iwsp && hmminfo->multipath) {
        /* 必要ならばショートポーズを挟み込む位置を指定する */
        for(i=0;i<winfo->wlen[word];i++) {
          dwrk->has_sp[i] = FALSE;
        }
        dwrk->has_sp[winfo->wlen[word]-1] = TRUE;
      }

      /* 単語HMMを作る */
      /* make word HMM */
      whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL);
      if (whmm == NULL) {
        j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]);
      }

      /* 計算前の g[] 初期値は now->g[] を使用 */
      /* the initial forward score g[] is set by now->g[] */
      for (t=0;t<peseqlen;t++) {
        dwrk->g[t]=now->g[t];
      }

      /* 次段用のg_prevを格納するノード位置を設定 */
      /* set where to store scores as new g_prev[] for the next backscan
      in the HMM */
      if (hmminfo->multipath) {
        store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2;
        store_point_maxarc = max_out_arc(winfo->wseq[word][0]);
        if (enable_iwsp && dwrk->has_sp[0]) {
          store_point += hmm_logical_state_num(hmminfo->sp) - 2;
          if (store_point_maxarc < max_out_arc(hmminfo->sp)) {
            store_point_maxarc = max_out_arc(hmminfo->sp);
          }
        }
      } else {
        store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2 - 1;
      }

      /* scan中に直前単語とこの単語をまたぐ場所は,なし */
      /* the connection point of the last word is not exist in the HMM */
      crossword_point = -1;
    }

  } else {			/* ccd_flag == FALSE */

    if (enable_iwsp && hmminfo->multipath) {
      /* 必要ならばショートポーズを挟み込む位置を指定する */
      for(i=0;i<winfo->wlen[word];i++) {
        dwrk->has_sp[i] = FALSE;
      }
      dwrk->has_sp[winfo->wlen[word]-1] = TRUE;
    }

    /* 音素環境非依存の場合は単純に最終単語分の HMM を作成 */
    /* for monophone: simple make HMM for the last word */
    whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL);
    if (whmm == NULL) {
      j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]);
    }

    /* 計算前の g[] 初期値は now->g[] を使用 */
    /* the initial forward score g[] is set by now->g[] */
    for (t=0;t<peseqlen;t++) {
      dwrk->g[t]=now->g[t];
    }

  }

#ifdef TCD
  jlog("DEBUG: whmm len	  = %d\n",whmm->len);
  jlog("DEBUG: crossword_point = %d\n", crossword_point);
  jlog("DEBUG: g[] store point = %d\n", store_point);
#endif

  wordhmmnum = whmm->len;
  if (wordhmmnum >= winfo->maxwn + 10) {
    j_internal_error("scan_word: word too long (>%d)\n", winfo->maxwn + 10);
  }

#ifndef GRAPHOUT_PRECISE_BOUNDARY
  if (r->graphout) {
    if (ccd_flag) {
      now->tail_g_score = now->g[now->bestt];
    }
  }
#endif

  /* ----------------------- do scan ----------------------- */

  /* scan開始点を検索 -> starttへ*/
  /* search for the start frame -> set to startt */
  for(t = peseqlen-1; t >=0 ; t--) {
    if (
#ifdef SCAN_BEAM
      dwrk->g[t] > framemaxscore[t] - scan_beam_thres &&
#endif
      dwrk->g[t] > LOG_ZERO) {
        break;
    }
  }
  if (t < 0) {			/* no node has score > LOG_ZERO */
    for(t=0;t<peseqlen;t++) {
      if (ccd_flag) now->g_prev[t] = LOG_ZERO;
      now->g[t] = LOG_ZERO;
    }
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      for(t=0;t<peseqlen;t++) {
        now->wordend_frame[t] = -1;
        now->wordend_gscore[t] = LOG_ZERO;
      }
    }
#endif
    goto end_of_scan;
  }
  startt = t;

  /* clear [startt+1..peseqlen-1] */
  for(t=peseqlen-1;t>startt;t--) {
    if (ccd_flag) now->g_prev[t] = LOG_ZERO;
    now->g[t] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      now->wordend_frame[t] = -1;
      now->wordend_gscore[t] = LOG_ZERO;
    }
#endif
  }

  /* バッファポインタ初期化 */
  tn = 0; tl = 1;

#ifdef GRAPHOUT_PRECISE_BOUNDARY
  if (r->graphout) {
    for(i=0;i<wordhmmnum;i++) {
      dwrk->wend_token_frame[tn][i] = -1;
      dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
    }
  }
#endif

  if (! hmminfo->multipath) {
    /* Below initialization is not needed on multipath version, since
    the actual viterbi will begin at frame 0 in multipath mode in main loop */

    /* 時間 [startt] 上の値を初期化 */
    /* initialize scores on frame [startt] */
    for(i=0;i<wordhmmnum-1;i++) dwrk->wordtrellis[tn][i] = LOG_ZERO;
    dwrk->wordtrellis[tn][wordhmmnum-1] = dwrk->g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param);
    if (ccd_flag) {
      now->g_prev[startt] = dwrk->wordtrellis[tn][store_point];
    }
    now->g[startt] = dwrk->wordtrellis[tn][0];

#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      if (ccd_flag) {
        if (back_rescan) {
          if (wordhmmnum-1 == crossword_point) {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = startt;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt];
          } else {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = -1;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO;
          }
        } else {
          dwrk->wend_token_frame[tn][wordhmmnum-1] = startt;
          dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt];
        }
      } else {
        dwrk->wend_token_frame[tn][wordhmmnum-1] = startt;
        dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt];
      }
      now->wordend_frame[startt] = dwrk->wend_token_frame[tn][0];
      now->wordend_gscore[startt] = dwrk->wend_token_gscore[tn][0];
    }
#endif
  } /* end of hmminfo->multipath */

  endt = startt;

  /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */
  /* main loop: start from [startt], and compute Viterbi toward [0] */
  for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) {

    /* wordtrellisのワークエリアをスワップ */
    i = tn; tn = tl; tl = i;

    node_exist_p = FALSE;	/* TRUE if there is at least 1 survived node in this frame */

    if (hmminfo->multipath) {

      /* 端のノード [t][wordhmmnum-1]は g[] を参照する */
      /* the edge node [t][wordhmmnum-1] is equal to g[] */

      /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */
      /* expand trellis for node [t][wordhmmnum-2..0] */
      tmpmax_store = LOG_ZERO;

    } else {

      /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */
      /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */
      tmptmp = LOG_ZERO;
      for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) {
        score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a;
        if (tmptmp < score1) {
          j = ac->arc;
          tmptmp = score1;
        }
      }
      if (dwrk->g[t] > tmptmp) {
        tmpmax = dwrk->g[t];
#ifdef GRAPHOUT_PRECISE_BOUNDARY
        if (r->graphout) {
          if (!back_rescan || wordhmmnum-1 == crossword_point) {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = t;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[t];
          } else {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j];
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j];
          }
        }
#endif
      } else {
        tmpmax = tmptmp;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
        if (r->graphout) {
          dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j];
          dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j];
        }
#endif
      }

      /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */
      /* check if the edge node is within score envelope */
      if (
#ifdef SCAN_BEAM
        tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
        tmpmax <= LOG_ZERO
        ) {
          dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
          if (r->graphout) {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = -1;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO;
          }
#endif
      } else {
        node_exist_p = TRUE;
        dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param);
      }

    } /* end of ~multipath */

    /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */
    /* expand trellis for node [t][wordhmmnum-2..0] */
    for(i=wordhmmnum-2;i>=0;i--) {

      if (ccd_flag) {

        /* 最尤パスと最尤スコア tmpmax を見つける */
        /* tmpmax2 は次回用 g_prev[] のための最大値(自己遷移を除いた最大値) */
        /* find most likely path and the max score 'tmpmax' */
        /* 'tmpmax2' is max score excluding self transition, for next g_prev[] */
        if (! hmminfo->multipath) {
          if (i == store_point) {
            tmpmax2 = LOG_ZERO;
          }
        }
        tmpmax = LOG_ZERO;
        for (ac=whmm->state[i].ac;ac;ac=ac->next) {
          if (hmminfo->multipath) {
            if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t];
            else if (t + 1 > startt) score1 = LOG_ZERO;
            else score1 = dwrk->wordtrellis[tl][ac->arc];
            score1 += ac->a;
          } else {
            score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a;
          }
          if (i <= crossword_point && ac->arc > crossword_point) {
            /* これは単語を越える遷移 (backscan 実行時) */
            /* this is a transition across word (when backscan is enabled) */
            score1 += now->lscore; /* add LM score */
          }

          if (hmminfo->multipath) {
            if (i <= store_point && ac->arc > store_point) {
              if (tmpmax_store < score1) tmpmax_store = score1;
            }
          } else {
            if (i == store_point && i != ac->arc) {
              if (tmpmax2 < score1) tmpmax2 = score1;
            }
          }

          if (tmpmax < score1) {
            tmpmax = score1;
            j = ac->arc;
          }
        }

        /* スコアエンベロープチェック: 一定幅外なら落とす */
        /* check if score of this node is within the score envelope */
        if (
#ifdef SCAN_BEAM
          tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
          tmpmax <= LOG_ZERO
          ) {  /* invalid node */
            dwrk->wordtrellis[tn][i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
            if (r->graphout) {
              dwrk->wend_token_frame[tn][i] = -1;
              dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
            }
#endif
            if (! hmminfo->multipath) {
              if (i == store_point) now->g_prev[t] = LOG_ZERO;
            }
        } else { /* survived node */
          if (! hmminfo->multipath) {
            if (i == store_point) now->g_prev[t] = tmpmax2;
          }
#ifdef GRAPHOUT_PRECISE_BOUNDARY
          if (r->graphout) {

            if (hmminfo->multipath) {
              if ((back_rescan && i <= crossword_point && j > crossword_point)
                || j == wordhmmnum-1) {
                  dwrk->wend_token_frame[tn][i] = t;
                  dwrk->wend_token_gscore[tn][i] = tmpmax;
              } else {
                dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
                dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
              }
            } else {
              if (i <= crossword_point && j > crossword_point) {
                dwrk->wend_token_frame[tn][i] = t;
                dwrk->wend_token_gscore[tn][i] = tmpmax;
              } else {
                dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
                dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
              }
            }
          }
#endif
          node_exist_p = TRUE;	/* at least one node survive in this frame */

          dwrk->wordtrellis[tn][i] = tmpmax;
          if (! hmminfo->multipath || i > 0) {
            /* compute output probability */
            dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param);
          }
        }

      } else {			/* not triphone */

        /* backscan 無し: store_point, crossword_point は無関係 */
        /* no backscan: store_point, crossword_point ignored */
        tmpmax = LOG_ZERO;
        if (hmminfo->multipath) {
          for (ac=whmm->state[i].ac;ac;ac=ac->next) {
            if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t];
            else if (t + 1 > startt) score1 = LOG_ZERO;
            else score1 = dwrk->wordtrellis[tl][ac->arc];
            score1 += ac->a;
            if (tmpmax < score1) {
              tmpmax = score1;
              j = ac->arc;
            }
          }
        } else {
          for (ac=whmm->state[i].ac;ac;ac=ac->next) {
            score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a;
            if (tmpmax < score1) {
              tmpmax = score1;
              j = ac->arc;
            }
          }
        }

        /* スコアエンベロープチェック: 一定幅外なら落とす */
        /* check if score of this node is within the score envelope */
        if (
#ifdef SCAN_BEAM
          tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
          tmpmax <= LOG_ZERO
          ) {
            /* invalid node */
            dwrk->wordtrellis[tn][i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
            if (r->graphout) {
              dwrk->wend_token_frame[tn][i] = -1;
              dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
            }
#endif
        } else {
          /* survived node */
          node_exist_p = TRUE;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
          if (r->graphout) {
            if (hmminfo->multipath) {
              if (j == wordhmmnum-1) {
                dwrk->wend_token_frame[tn][i] = t;
                dwrk->wend_token_gscore[tn][i] = tmpmax;
              } else {
                dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
                dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
              }
            } else {
              dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
              dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
            }
          }
#endif
          /* score of node [t][i] has been determined here */
          dwrk->wordtrellis[tn][i] = tmpmax;
          if (! hmminfo->multipath || i > 0) {
            dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param);
          }
        }

      }
    } /* end of node loop */

    /* 時間 t のViterbi計算終了. 前向きスコアはscanした単語の始端 */
    /* Viterbi end for frame [t].  the forward score is the score of word
    beginning scanned */
    now->g[t] = dwrk->wordtrellis[tn][0];
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      now->wordend_frame[t] = dwrk->wend_token_frame[tn][0];
      now->wordend_gscore[t] = dwrk->wend_token_gscore[tn][0];
    }
#endif

    if (hmminfo->multipath) {
      /* triphone 時, 次段のために store_point のデータをg_prevに保存 */
      /* store the scores crossing the store_point to g_prev, for next scan */
      if (ccd_flag) {
        /* the max arc crossing the store_point always selected as tmpmax_score */ 
        tmpmax_store -= store_point_maxarc;
        if (tmpmax_store < LOG_ZERO) tmpmax_store = LOG_ZERO;
        now->g_prev[t] = tmpmax_store;
      }
    }

    /* store the number of last computed frame */
    if (node_exist_p) endt = t;

    /* scanした単語の第1パスでの始端時刻より先まで t が進んでおり,かつ
    この t においてスコアエンベロープによって生き残ったノードが一つも
    無かったならば,このフレームで計算を打ち切りそれ以上先([0..t-1])は
    計算しない */
    /* if frame 't' already reached the beginning frame of scanned word
    in 1st pass and no node was survived in this frame (all nodes pruned
    by score envelope), terminate computation at this frame and
    do not computer further frame ([0..t-1]). */
    if (t < now->estimated_next_t && (!node_exist_p)) {
      /* clear the rest scores */
      for (i=t-1;i>=0;i--) {
        now->g[i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
        if (r->graphout) {
          now->wordend_frame[i] = -1;
          now->wordend_gscore[i] = LOG_ZERO;
        }
#endif
        if (ccd_flag) now->g_prev[i] = LOG_ZERO;
      }
      /* terminate loop */
      break;
    }

  } /* end of time loop */

  if (debug2_flag) jlog("DEBUG: scanned: [%3d-%3d]\n", endt, startt);

end_of_scan:

  if (hmminfo->multipath) {
    /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */
    /* compute the total forward score (transition from state 0 to frame 0 */
    if (endt == 0) {
      tmpmax = LOG_ZERO;
      for(ac=whmm->state[0].ac;ac;ac=ac->next) {
        score1 = dwrk->wordtrellis[tn][ac->arc] + ac->a;
        if (tmpmax < score1) tmpmax = score1;
      }
      now->final_g = score1;
    } else {
      now->final_g = LOG_ZERO;
    }
  }

  /* 次回 backscan のための情報格納 */
  /* store data for next backscan */
  if (ccd_flag) {
    if (store_point == (hmminfo->multipath ? wordhmmnum - 2 : wordhmmnum - 1)) {
      /* last_ph無し,かつ単語の音素長=1の場合、次回の scan_word() で
      単語全体がもう一度再計算される. この場合,
      g_prev は,このscan_wordを開始する前のスコアを入れておく必要がある */
      /* if there was no 'last_ph' and the scanned word consists of only
      1 phone, the whole word should be re-computed in the future scan_word().
      So the next 'g_prev[]' should be the initial forward scores
      before we begin Viterbi (= g[t]). */
      for (t = startt; t>=0; t--) {
        now->g_prev[t] = dwrk->g[t];
      }
    }
#ifndef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      if (now->tail_g_score != LOG_ZERO) {
        if (now->prevgraph != NULL) {
          (now->prevgraph)->leftscore = now->tail_g_score;
        }
      }
    }
#endif
    /* 次回のために now->last_ph を更新 */
    /* update 'now->last_ph' for future scan_word() */
    if (back_rescan) {
      now->last_ph = dwrk->phmmseq[0];
    } else {
      now->last_ph = winfo->wseq[word][0];
    }
    if (enable_iwsp && hmminfo->multipath) {
      now->last_ph_sp_attached = dwrk->has_sp[0];
    }
  }

#ifdef GRAPHOUT_PRECISE_BOUNDARY
  if (! hmminfo->multipath) {
    if (r->graphout) {
      /* 次回の next_word 用に境界情報を調整 */
      /* proceed word boundary for one step for next_word */
      now->wordend_frame[peseqlen-1] = now->wordend_frame[0];
      now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0];
      for (t=0;t<peseqlen-1;t++) {
        now->wordend_frame[t] = now->wordend_frame[t+1];
        now->wordend_gscore[t] = now->wordend_gscore[t+1];
      }
    }
  }
#endif

  /* free work area */
  free_hmm(whmm);
#ifdef TCD
  if (hmminfo->multipath) {
    if (ccd_flag) {
      jlog("DEBUG: last_ph = %s", (now->last_ph)->name);
      if (now->last_ph_sp_attached) jlog(" (sp attached)");
      jlog("\n");
    }
  } else {
    jlog("DEBUG: last_ph = %s\n", (now->last_ph)->name);
  }
#endif
}
예제 #2
0
/** 
 * <JA>
 * 与えられた音素の並びに対して Viterbi 計算を行い,前向きスコアを
 * 更新する汎用関数. 
 * 
 * @param g [in] 現在の時間ごとの前向きスコア
 * @param g_new [out] 更新後の新たな前向きスコアを格納するバッファ
 * @param phmmseq [in] 音素HMMの並び
 * @param has_sp [in] short-pause location
 * @param phmmlen [in] @a phmmseq の長さ
 * @param param [in] 入力パラメータ
 * @param framelen [in] 入力フレーム長
 * @param least_frame [in] ビーム設定時,このフレーム数以上は Viterbi計算する
 * @param final_g [in] final g scores
 * @param wordend_frame_src [in] 現在の単語終端フレームトークン
 * @param wordend_frame_dst [out] 更新後の新たな単語終端フレームトークン
 * @param wordend_gscore_src [in] 現在の単語終端スコアトークン
 * @param wordend_gscore_dst [out] 更新後の新たな単語終端スコアトークン
 * @param r [in] recognition process instance
 * </JA>
 * <EN>
 * Generic function to perform Viterbi path updates for given phoneme
 * sequence.
 * 
 * @param g [in] current forward scores at each input frame
 * @param g_new [out] buffer to save the resulting score updates
 * @param phmmseq [in] phoneme sequence to perform Viterbi
 * @param has_sp [in] short-pause location
 * @param phmmlen [in] length of @a phmmseq.
 * @param param [in] input parameter vector
 * @param framelen [in] input frame length to compute
 * @param least_frame [in] Least frame length to force viterbi even with beam
 * @param final_g [in] final g scores
 * @param wordend_frame_src [in] current word-end frame tokens
 * @param wordend_frame_dst [out] buffer to store updated word-end frame tokens
 * @param wordend_gscore_src [in] current word-end score tokens
 * @param wordend_gscore_dst [out] buffer to store updated word-end score tokens
 * @param r [in] recognition process instance
 * 
 * </EN>
 */
static void
do_viterbi(LOGPROB *g, LOGPROB *g_new, HMM_Logical **phmmseq, boolean *has_sp, int phmmlen, HTK_Param *param, int framelen, int least_frame, LOGPROB *final_g, short *wordend_frame_src, short *wordend_frame_dst, LOGPROB *wordend_gscore_src, LOGPROB *wordend_gscore_dst, RecogProcess *r) /* has_sp and final_g is for multipath only */
{
  HMM *whmm;			/* HMM */
  int wordhmmnum;		/* length of above */
  int startt;			/* scan start frame */
  LOGPROB tmpmax,tmpscore;	/* variables for Viterbi process */
  A_CELL *ac;
  int t,i,j;
  boolean node_exist_p;
  int tn;		       ///< Temporal pointer to current buffer
  int tl;		       ///< Temporal pointer to previous buffer

  /* store global values to local for rapid access */
  StackDecode *dwrk;
  WORD_INFO *winfo;
  HTK_HMM_INFO *hmminfo;
  LOGPROB *framemaxscore;
#ifdef SCAN_BEAM
  LOGPROB scan_beam_thres;
#endif

  dwrk = &(r->pass2);
  winfo = r->lm->winfo;
  hmminfo = r->am->hmminfo;
  framemaxscore = r->pass2.framemaxscore;
#ifdef SCAN_BEAM
  scan_beam_thres = r->config->pass2.scan_beam_thres;
#endif


#ifdef TCD
  jlog("DEBUG: scan for:");
  for (i=0;i<phmmlen;i++) {
    jlog(" %s", phmmseq[i]->name);
  }
  jlog("\n");
#endif
  
  /* 単語HMMを作る */
  /* make word HMM */
  whmm = new_make_word_hmm(hmminfo, phmmseq, phmmlen, has_sp);
  if (whmm == NULL) {
    j_internal_error("Error: failed to make word hmm\n");
  }
  wordhmmnum = whmm->len;
  if (wordhmmnum >= winfo->maxwn + 10) {
    j_internal_error("do_viterbi: word too long (>%d)\n", winfo->maxwn + 10);
  }

  /* scan開始点を検索 -> starttへ*/
  /* search for the start frame -> set to startt */
  for(t = framelen-1; t >=0 ; t--) {
    if (
#ifdef SCAN_BEAM
	g[t] > framemaxscore[t] - scan_beam_thres &&
#endif
	g[t] > LOG_ZERO) {
      break;
    }
  }
  if (t < 0) {			/* no node has score > LOG_ZERO */
    /* reset all scores and end */
    for(t=0;t<framelen;t++) {
      g_new[t] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
      if (r->graphout) {
	wordend_frame_dst[t] = -1;
	wordend_gscore_dst[t] = LOG_ZERO;
      }
#endif
    }
    free_hmm(whmm);
    return;
  }
  startt = t;
  
  /* 開始点以降[startt+1..framelen-1] の g_new[] をリセット */
  /* clear g_new[] for [startt+1..framelen-1] */
  for(t=framelen-1;t>startt;t--) {
    g_new[t] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      wordend_frame_dst[t] = -1;
      wordend_gscore_dst[t] = LOG_ZERO;
    }
#endif
  }

  /*****************/
  /* viterbi start */
  /*****************/

  /* set initial swap buffer */
  tn = 0; tl = 1;

#ifdef GRAPHOUT_PRECISE_BOUNDARY
  if (r->graphout) {
    for(i=0;i<wordhmmnum;i++) {
      dwrk->wend_token_frame[tn][i] = -1;
      dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
    }
  }
#endif

  if (! hmminfo->multipath) {
    /* 時間 [startt] 上の値を初期化 */
    /* initialize scores on frame [startt] */
    for(i=0;i<wordhmmnum-1;i++) dwrk->wordtrellis[tn][i] = LOG_ZERO;
    dwrk->wordtrellis[tn][wordhmmnum-1] = g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param);
    g_new[startt] = dwrk->wordtrellis[tn][0];
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      dwrk->wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[startt];
      dwrk->wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[startt];
      wordend_frame_dst[startt] = dwrk->wend_token_frame[tn][0];
      wordend_gscore_dst[startt] = dwrk->wend_token_gscore[tn][0];
    }
#endif
  }
  
  /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */
  /* main loop: start from [startt], and compute Viterbi toward [0] */
  for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) {
    
    /* wordtrellisのワークエリアをスワップ */
    /* swap workarea of wordtrellis */
    i = tn; tn = tl; tl = i;

    node_exist_p = FALSE;	/* TRUE if there is at least 1 survived node in this frame */

    if (! hmminfo->multipath) {
    
      /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */
      /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */
      tmpscore = LOG_ZERO;
      for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) {
	if (tmpscore < dwrk->wordtrellis[tl][ac->arc] + ac->a) {
	  j = ac->arc;
	  tmpscore = dwrk->wordtrellis[tl][ac->arc] + ac->a;
	}
      }
      if (g[t] > tmpscore) {
	tmpmax = g[t];
#ifdef GRAPHOUT_PRECISE_BOUNDARY
	if (r->graphout) {
	  dwrk->wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[t];
	  dwrk->wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[t];
	}
#endif
      } else {
	tmpmax = tmpscore;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
	if (r->graphout) {
	  dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j];
	  dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j];
	}
#endif
      }
      
      /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */
      /* check if the edge node is within score envelope */
      if (
#ifdef SCAN_BEAM
	  tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
	  tmpmax <= LOG_ZERO
	  ) {
	dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
	if (r->graphout) {
	  dwrk->wend_token_frame[tn][wordhmmnum-1] = -1;
	  dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO;
	}
#endif
      } else {
	node_exist_p = TRUE;
	dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param);
      }

    }

    /* node[wordhmmnum-2..0]についてトレリスを展開 */
    /* expand trellis for node [t][wordhmmnum-2..0] */
    for(i=wordhmmnum-2;i>=0;i--) {
      
      /* 最尤パスと最尤スコア tmpmax を見つける */
      /* find most likely path and the max score 'tmpmax' */
      tmpmax = LOG_ZERO;
      for (ac=whmm->state[i].ac;ac;ac=ac->next) {
	if (hmminfo->multipath) {
	  if (ac->arc == wordhmmnum-1) tmpscore = g[t];
	  else if (t + 1 > startt) tmpscore = LOG_ZERO;
	  else tmpscore = dwrk->wordtrellis[tl][ac->arc];
	  tmpscore += ac->a;
	} else {
	  tmpscore = dwrk->wordtrellis[tl][ac->arc] + ac->a;
	}
	if (tmpmax < tmpscore) {
	  tmpmax = tmpscore;
	  j = ac->arc;
	}
      }
      
      /* スコアエンベロープチェック: 一定幅外なら落とす */
      /* check if score of this node is within the score envelope */
      if (
#ifdef SCAN_BEAM
	  tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
	  tmpmax <= LOG_ZERO
	  ) {
	/* invalid node */
	dwrk->wordtrellis[tn][i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
	if (r->graphout) {
	  dwrk->wend_token_frame[tn][i] = -1;
	  dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
	}
#endif
      } else {
	/* survived node */
	node_exist_p = TRUE;
 	dwrk->wordtrellis[tn][i] = tmpmax;
	if (! hmminfo->multipath || i > 0) {
	  dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param);
	}
#ifdef GRAPHOUT_PRECISE_BOUNDARY
	if (r->graphout) {
	  if (hmminfo->multipath) {
	    if (j == wordhmmnum-1) {
	      dwrk->wend_token_frame[tn][i] = wordend_frame_src[t];
	      dwrk->wend_token_gscore[tn][i] = wordend_gscore_src[t];
	    } else {
	      dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
	      dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
	    }
	  } else {
	    dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
	    dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
	  }
	}
#endif
      }
    } /* end of node loop */

    /* 時間 t のViterbi計算終了. 新たな前向きスコア g_new[t] をセット */
    /* Viterbi end for frame [t].  set the new forward score g_new[t] */
    g_new[t] = dwrk->wordtrellis[tn][0];
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
    /* new wordend */
      wordend_frame_dst[t] = dwrk->wend_token_frame[tn][0];
      wordend_gscore_dst[t] = dwrk->wend_token_gscore[tn][0];
    }
#endif
    /* 指定された least_frame より先まで t が進んでおり,かつこの t において
       スコアエンベロープによって生き残ったノードが一つも無かった場合,
       このフレームで計算を打ち切りそれ以上先([0..t-1])は計算しない */
    /* if frame 't' already reached the 'least_frame' and no node was
       survived in this frame (all nodes pruned by score envelope),
       terminate computation at this frame and do not computer further
       frame ([0..t-1]). */
    if (t < least_frame && (!node_exist_p)) {
      /* crear the rest scores */
      for (i=t-1;i>=0;i--) {
	g_new[i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
	if (r->graphout) {
	  wordend_frame_dst[i] = -1;
	  wordend_gscore_dst[i] = LOG_ZERO;
	}
#endif
      }
      /* terminate loop */
      break;
    }
    
  } /* end of time loop */

  if (hmminfo->multipath) {
    /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */
    /* compute the total forward score (transition from state 0 to frame 0 */
    if (t < 0) {			/* computed till the end */
      tmpmax = LOG_ZERO;
      for(ac=whmm->state[0].ac;ac;ac=ac->next) {
	tmpscore = dwrk->wordtrellis[tn][ac->arc] + ac->a;
	if (tmpmax < tmpscore) tmpmax = tmpscore;
      }
      *final_g = tmpmax;
    } else {
      *final_g = LOG_ZERO;
    }
  }

  /* free work area */
  free_hmm(whmm);
}
/** 
 * @brief  Perform Viterbi alignment.
 *
 * This function performs viterbi alignment for the given sentence %HMM,
 * input parameter and unit definition.  Any segmentatino unit (word, phoneme
 * state, etc.) is allowed: the segmentation unit should be specified by
 * specifying a list of state id which are the end of each unit.
 * For example, if you want to obtain phoneme alignment, the list of state
 * number that exist at the end of phones should be specified by @a endstates.
 * 
 * @param hmm [in] sentence HMM to be matched
 * @param param [in] input parameter data
 * @param wrk [i/o] HMM computation work area
 * @param multipath [in] TRUE if need multi-path handling
 * @param endstates [in] list of state id that corrsponds to the ends of units
 * @param ulen [in] total number of units in the @a hmm
 * @param id_ret [out] Pointer to store the newly allocated array of the resulting id sequence of units on the best path.
 * @param seg_ret [out] Pointer to store the newly allocated array of the resulting end frame of each unit on the best path.
 * @param uscore_ret [out] Pointer to store the newly allocated array of the resulting score at the end frame of each unit on the best path.
 * @param slen_ret [out] Pointer to store the total number of units on the best path.
 * 
 * @return the total acoustic score for the whole input.
 */
LOGPROB
viterbi_segment(HMM *hmm, HTK_Param *param, HMMWork *wrk, boolean multipath, int *endstates, int ulen, int **id_ret, int **seg_ret, LOGPROB **uscore_ret, int *slen_ret)
{
  /* for viterbi */
  LOGPROB *nodescore[2];	/* node buffer */
  SEGTOKEN **tokenp[2];		/* propagating token which holds segment info */
  int startt, endt;
  int *from_node;
  int *u_end, *u_start;	/* the node is an end of the word, or -1 for non-multipath mode*/
  int i, n;
  unsigned int t;
  int tl,tn;
  LOGPROB tmpsum;
  A_CELL *ac;
  SEGTOKEN *newtoken, *token, *tmptoken, *root;
  LOGPROB result_score;
  LOGPROB maxscore, minscore;	/* for debug */
  int maxnode;			/* for debug */
  int *id, *seg, slen;
  LOGPROB *uscore;

  /* assume more than 1 units */
  if (ulen < 1) {
    jlog("Error: vsegment: no unit?\n");
    return LOG_ZERO;
  }

  if (!multipath) {
    /* initialize unit start/end marker */
    u_start = (int *)mymalloc(hmm->len * sizeof(int));
    u_end   = (int *)mymalloc(hmm->len * sizeof(int));
    for (n = 0; n < hmm->len; n++) {
      u_start[n] = -1;
      u_end[n] = -1;
    }
    u_start[0] = 0;
    u_end[endstates[0]] = 0;
    for (i=1;i<ulen;i++) {
      u_start[endstates[i-1]+1] = i;
      u_end[endstates[i]] = i;
    }
#if 0
    for (i=0;i<hmm->len;i++) {
      printf("unit %d: start=%d, end=%d\n", i, u_start[i], u_end[i]);
    }
#endif
  }

  /* initialize node buffers */
  tn = 0;
  tl = 1;
  root = NULL;
  for (i=0;i<2;i++){
    nodescore[i] = (LOGPROB *)mymalloc(hmm->len * sizeof(LOGPROB));
    tokenp[i] = (SEGTOKEN **)mymalloc(hmm->len * sizeof(SEGTOKEN *));
    for (n = 0; n < hmm->len; n++) {
      tokenp[i][n] = NULL;
    }
  }
  for (n = 0; n < hmm->len; n++) {
    nodescore[tn][n] = LOG_ZERO;
    newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
    newtoken->last_id = -1;
    newtoken->last_end_frame = -1;
    newtoken->last_end_score = 0.0;
    newtoken->list = root;
    root = newtoken;
    newtoken->next = NULL;
    tokenp[tn][n] = newtoken;
  }
  from_node = (int *)mymalloc(sizeof(int) * hmm->len);
  
  /* first frame: only set initial score */
  /*if (hmm->state[0].is_pseudo_state) {
    jlog("Warning: state %d: pseudo state?\n", 0);
    }*/
  if (multipath) {
    nodescore[tn][0] = 0.0;
  } else {
    nodescore[tn][0] = outprob(wrk, 0, &(hmm->state[0]), param);
  }

  /* do viterbi for rest frame */
  if (multipath) {
    startt = 0;  endt = param->samplenum;
  } else {
    startt = 1;  endt = param->samplenum - 1;
  }
  for (t = startt; t <= endt; t++) {
    i = tl;
    tl = tn;
    tn = i;
    maxscore = LOG_ZERO;
    minscore = 0.0;

    /* clear next scores */
    for (i=0;i<hmm->len;i++) {
      nodescore[tn][i] = LOG_ZERO;
      from_node[i] = -1;
    }

    /* select viterbi path for each node */
    for (n = 0; n < hmm->len; n++) {
      if (nodescore[tl][n] <= LOG_ZERO) continue;
      for (ac = hmm->state[n].ac; ac; ac = ac->next) {
        tmpsum = nodescore[tl][n] + ac->a;
        if (nodescore[tn][ac->arc] < tmpsum) {
          nodescore[tn][ac->arc] = tmpsum;
	  from_node[ac->arc] = n;
	}
      }
    }
    /* propagate token, appending new if path was selected between units */
    if (multipath) {
      for (n = 0; n < hmm->len; n++) {
	if (from_node[n] == -1 || nodescore[tn][n] <= LOG_ZERO) {
	  /*tokenp[tn][n] = NULL;*/
	} else {
	  i=0;
	  while (from_node[n] > endstates[i]) i++;
	  if (n > endstates[i]) {
	    newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
	    newtoken->last_id = i;
	    newtoken->last_end_frame = t-1;
	    newtoken->last_end_score = nodescore[tl][from_node[n]];
	    newtoken->list = root;
	    root = newtoken;
	    newtoken->next = tokenp[tl][from_node[n]];
	    tokenp[tn][n] = newtoken;
	  } else {
	    tokenp[tn][n] = tokenp[tl][from_node[n]];
	  }
	}
      }
    } else {			/* not multipath */
      for (n = 0; n < hmm->len; n++) {
	if (from_node[n] == -1) {
	  tokenp[tn][n] = NULL;
	} else if (nodescore[tn][n] <= LOG_ZERO) {
	  tokenp[tn][n] = tokenp[tl][from_node[n]];
	} else {
	  if (u_end[from_node[n]] != -1 && u_start[n] != -1
	      && from_node[n] !=  n) {
	    newtoken = (SEGTOKEN *)mymalloc(sizeof(SEGTOKEN));
	    newtoken->last_id = u_end[from_node[n]];
	    newtoken->last_end_frame = t-1;
	    newtoken->last_end_score = nodescore[tl][from_node[n]];
	    newtoken->list = root;
	    root = newtoken;
	    newtoken->next = tokenp[tl][from_node[n]];
	    tokenp[tn][n] = newtoken;
	  } else {
	    tokenp[tn][n] = tokenp[tl][from_node[n]];
	  }
	}
      }
    }

    if (multipath) {
      /* if this is next of last frame, loop ends here */
      if (t == param->samplenum) break;
    }
	
    /* calc outprob to new nodes */
    for (n = 0; n < hmm->len; n++) {
      if (multipath) {
	if (hmm->state[n].out.state == NULL) continue;
      }
      if (nodescore[tn][n] > LOG_ZERO) {
	if (hmm->state[n].is_pseudo_state) {
	  jlog("Warning: vsegment: state %d: pseudo state?\n", n);
	}
	nodescore[tn][n] += outprob(wrk, t, &(hmm->state[n]), param);
      }
      if (nodescore[tn][n] > maxscore) { /* for debug */
	maxscore = nodescore[tn][n];
	maxnode = n;
      }
    }
    
#if 0
    for (i=0;i<ulen;i++) {
      printf("%d: unit %d(%d-%d): begin_frame = %d\n", t - 1, i,
	     (i > 0) ? endstates[i-1]+1 : 0, endstates[i],
	     (multipath && tokenp[tl][endstates[i]] == NULL) ? -1 : tokenp[tl][endstates[i]]->last_end_frame + 1);
    }
#endif

    /* printf("t=%3d max=%f n=%d\n",t,maxscore, maxnode); */
    
  }

  result_score = nodescore[tn][hmm->len-1];

  /* parse back the last token to see the trail of best viterbi path */
  /* and store the informations to returning buffer */
  slen = 0;
  if (!multipath) slen++;
  for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
    if (token->last_end_frame == -1) break;
    slen++;
  }
  id = (int *)mymalloc(sizeof(int)*slen);
  seg = (int *)mymalloc(sizeof(int)*slen);
  uscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*slen);

  if (multipath) {
    i = slen - 1;
  } else {
    id[slen-1] = ulen - 1;
    seg[slen-1] = t - 1;
    uscore[slen-1] = result_score;
    i = slen - 2;
  }
  for(token = tokenp[tn][hmm->len-1]; token; token = token->next) {
    if (i < 0 || token->last_end_frame == -1) break;
    id[i] = token->last_id;
    seg[i] = token->last_end_frame;
    uscore[i] = token->last_end_score;
    i--;
  }

  /* normalize scores by frame */
  for (i=slen-1;i>0;i--) {
    uscore[i] = (uscore[i] - uscore[i-1]) / (seg[i] - seg[i-1]);
  }
  uscore[0] = uscore[0] / (seg[0] + 1);

  /* set return value */
  *id_ret = id;
  *seg_ret = seg;
  *uscore_ret = uscore;
  *slen_ret = slen;

  /* free memory */
  if (!multipath) {
    free(u_start);
    free(u_end);
  }
  free(from_node);
  token = root;
  while(token) {
    tmptoken = token->list;
    free(token);
    token = tmptoken;
  }
  for (i=0;i<2;i++) {
    free(nodescore[i]);
    free(tokenp[i]);
  }

  return(result_score);

}