/** * <JA> * 最後の1単語の前向きトレリスを計算して,文仮説の前向き尤度を更新する. * * @param now [i/o] 文仮説 * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * </JA> * <EN> * Compute the forward viterbi for the last word to update forward scores * and ready for word connection. * * @param now [i/o] hypothesis * @param param [in] input parameter vectors * @param r [in] recognition process instance * * </EN> * @callgraph * @callergraph */ void scan_word(NODE *now, HTK_Param *param, RecogProcess *r) { int i,t, j; HMM *whmm; A_CELL *ac; WORD_ID word; LOGPROB tmpmax, tmptmp, score1; int startt = 0, endt = 0; int wordhmmnum; LOGPROB tmpmax_store, store_point_maxarc; /* multipath */ LOGPROB tmpmax2 = LOG_ZERO; int phmmlen; HMM_Logical *ret, *wend; int store_point; int crossword_point = 0; boolean back_rescan = FALSE; boolean node_exist_p; int tn=0; ///< Temporal pointer to current buffer int tl=0; ///< Temporal pointer to previous buffer /* store global values to local for rapid access */ WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; LOGPROB *framemaxscore; int peseqlen; boolean ccd_flag; boolean enable_iwsp; #ifdef SCAN_BEAM LOGPROB scan_beam_thres; #endif StackDecode *dwrk; winfo = r->lm->winfo; hmminfo = r->am->hmminfo; dwrk = &(r->pass2); peseqlen = r->peseqlen; framemaxscore = r->pass2.framemaxscore; ccd_flag = r->ccd_flag; enable_iwsp = r->lm->config->enable_iwsp; /* multipath */ #ifdef SCAN_BEAM scan_beam_thres = r->config->pass2.scan_beam_thres; #endif if (hmminfo->multipath) { store_point = -1; } else { store_point = 0; } /* ----------------------- prepare HMM ----------------------- */ if (ccd_flag) { /* 直前の音素があれば,そこまでさかのぼって scan する */ /* if there are any last phone, enable backscan */ if (now->last_ph == NULL) { /* initial score: now->g[] */ /* scan range: phones in now->seq[now->seqnum-1] */ back_rescan = FALSE; } else { /* initial score: now->g_prev[] (1-phone before)*/ /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */ back_rescan = TRUE; } } #ifdef TCD if (now->last_ph != NULL) { jlog("DEBUG: inherited last_ph: %s\n", (now->last_ph)->name); if (now->last_ph_sp_attached) jlog("DEBUG: (sp attached)\n"); /* multipath */ } else { jlog("DEBUG: no last_ph inherited\n"); } #endif /* scan 範囲分のHMMを準備 */ /* prepare HMM of the scan range */ word = now->seq[now->seqnum-1]; if (ccd_flag) { if (back_rescan) { /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */ phmmlen = winfo->wlen[word] + 1; if (phmmlen > dwrk->phmmlen_max) { j_internal_error("scan_word: num of phonemes in a word exceed phmmlenmax (%d) ?\n", dwrk->phmmlen_max); } for (i=0;i<phmmlen - 2;i++) dwrk->phmmseq[i] = winfo->wseq[word][i]; if (enable_iwsp && hmminfo->multipath) { for (i=0;i<phmmlen - 2;i++) dwrk->has_sp[i] = FALSE; } /* 最終単語と last_ph 間の単語間triphoneを考慮 */ /* consider cross-word context dependency between the last word and now->last_ph */ wend = winfo->wseq[word][winfo->wlen[word]-1]; ret = get_right_context_HMM(wend, now->last_ph->name, hmminfo); if (ret == NULL) { /* triphone not found */ /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ /* exception: word with 1 phone (triphone may exist in the next expansion */ if (winfo->wlen[word] > 1 && wend->is_pseudo) { error_missing_right_triphone(wend, now->last_ph->name); } dwrk->phmmseq[phmmlen-2] = wend; } else { dwrk->phmmseq[phmmlen-2] = ret; } ret = get_left_context_HMM(now->last_ph, wend->name, hmminfo); if (ret == NULL) { /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ if (now->last_ph->is_pseudo) { error_missing_left_triphone(now->last_ph, wend->name); } dwrk->phmmseq[phmmlen-1] = now->last_ph; } else { dwrk->phmmseq[phmmlen-1] = ret; } if (enable_iwsp && hmminfo->multipath) { dwrk->has_sp[phmmlen-2] = TRUE; dwrk->has_sp[phmmlen-1] = now->last_ph_sp_attached; } #ifdef TCD jlog("DEBUG: w="); for(i=0;i<winfo->wlen[word];i++) { jlog(" %s",(winfo->wseq[word][i])->name); if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)"); } jlog(" | %s\n", (now->last_ph)->name); if (hmminfo->multipath && now->last_ph_sp_attached) jlog("DEBUG: (sp)\n"); jlog("DEBUG: scan for:"); for (i=0;i<phmmlen;i++) { jlog(" %s", dwrk->phmmseq[i]->name); if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)"); } jlog("\n"); #endif /* 単語HMMを作る */ /* make word HMM */ whmm = new_make_word_hmm(hmminfo, dwrk->phmmseq, phmmlen, (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); if (whmm == NULL) { j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); } /* backscan なので,計算前の g[] 初期値は now->g_prev[] を使用 */ /* As backscan enabled, the initial forward score g[] is set by now->g_prev[] */ for (t=0;t<peseqlen;t++) { dwrk->g[t]=now->g_prev[t]; } /* 次段用のg_prevを格納するノード位置を設定 */ /* set where to store scores as new g_prev[] for the next backscan in the HMM */ if (hmminfo->multipath) { store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2; store_point_maxarc = max_out_arc(dwrk->phmmseq[0]); if (enable_iwsp && dwrk->has_sp[0]) { store_point += hmm_logical_state_num(hmminfo->sp) - 2; if (store_point_maxarc < max_out_arc(hmminfo->sp)) { store_point_maxarc = max_out_arc(hmminfo->sp); } } } else { store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2 - 1; } /* scan中に直前単語とこの単語をまたぐ場所を設定 */ /* set where is the connection point of the last word in the HMM */ if (hmminfo->multipath) { crossword_point = whmm->len - hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]); if (enable_iwsp && dwrk->has_sp[phmmlen-1]) { crossword_point -= hmm_logical_state_num(hmminfo->sp) - 2; } } else { crossword_point = whmm->len - (hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]) - 2) - 1; } } else { /* not backscan mode */ /* scan range: phones in now->seq[now->seqnum-1] */ #ifdef TCD jlog("DEBUG: scan(org):"); for (i=0;i<winfo->wlen[word];i++) { jlog(" %s", (winfo->wseq[word][i])->name); } jlog("\n"); #endif if (enable_iwsp && hmminfo->multipath) { /* 必要ならばショートポーズを挟み込む位置を指定する */ for(i=0;i<winfo->wlen[word];i++) { dwrk->has_sp[i] = FALSE; } dwrk->has_sp[winfo->wlen[word]-1] = TRUE; } /* 単語HMMを作る */ /* make word HMM */ whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); if (whmm == NULL) { j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); } /* 計算前の g[] 初期値は now->g[] を使用 */ /* the initial forward score g[] is set by now->g[] */ for (t=0;t<peseqlen;t++) { dwrk->g[t]=now->g[t]; } /* 次段用のg_prevを格納するノード位置を設定 */ /* set where to store scores as new g_prev[] for the next backscan in the HMM */ if (hmminfo->multipath) { store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2; store_point_maxarc = max_out_arc(winfo->wseq[word][0]); if (enable_iwsp && dwrk->has_sp[0]) { store_point += hmm_logical_state_num(hmminfo->sp) - 2; if (store_point_maxarc < max_out_arc(hmminfo->sp)) { store_point_maxarc = max_out_arc(hmminfo->sp); } } } else { store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2 - 1; } /* scan中に直前単語とこの単語をまたぐ場所は,なし */ /* the connection point of the last word is not exist in the HMM */ crossword_point = -1; } } else { /* ccd_flag == FALSE */ if (enable_iwsp && hmminfo->multipath) { /* 必要ならばショートポーズを挟み込む位置を指定する */ for(i=0;i<winfo->wlen[word];i++) { dwrk->has_sp[i] = FALSE; } dwrk->has_sp[winfo->wlen[word]-1] = TRUE; } /* 音素環境非依存の場合は単純に最終単語分の HMM を作成 */ /* for monophone: simple make HMM for the last word */ whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); if (whmm == NULL) { j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); } /* 計算前の g[] 初期値は now->g[] を使用 */ /* the initial forward score g[] is set by now->g[] */ for (t=0;t<peseqlen;t++) { dwrk->g[t]=now->g[t]; } } #ifdef TCD jlog("DEBUG: whmm len = %d\n",whmm->len); jlog("DEBUG: crossword_point = %d\n", crossword_point); jlog("DEBUG: g[] store point = %d\n", store_point); #endif wordhmmnum = whmm->len; if (wordhmmnum >= winfo->maxwn + 10) { j_internal_error("scan_word: word too long (>%d)\n", winfo->maxwn + 10); } #ifndef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (ccd_flag) { now->tail_g_score = now->g[now->bestt]; } } #endif /* ----------------------- do scan ----------------------- */ /* scan開始点を検索 -> starttへ*/ /* search for the start frame -> set to startt */ for(t = peseqlen-1; t >=0 ; t--) { if ( #ifdef SCAN_BEAM dwrk->g[t] > framemaxscore[t] - scan_beam_thres && #endif dwrk->g[t] > LOG_ZERO) { break; } } if (t < 0) { /* no node has score > LOG_ZERO */ for(t=0;t<peseqlen;t++) { if (ccd_flag) now->g_prev[t] = LOG_ZERO; now->g[t] = LOG_ZERO; } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { for(t=0;t<peseqlen;t++) { now->wordend_frame[t] = -1; now->wordend_gscore[t] = LOG_ZERO; } } #endif goto end_of_scan; } startt = t; /* clear [startt+1..peseqlen-1] */ for(t=peseqlen-1;t>startt;t--) { if (ccd_flag) now->g_prev[t] = LOG_ZERO; now->g[t] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { now->wordend_frame[t] = -1; now->wordend_gscore[t] = LOG_ZERO; } #endif } /* バッファポインタ初期化 */ tn = 0; tl = 1; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { for(i=0;i<wordhmmnum;i++) { dwrk->wend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } } #endif if (! hmminfo->multipath) { /* Below initialization is not needed on multipath version, since the actual viterbi will begin at frame 0 in multipath mode in main loop */ /* 時間 [startt] 上の値を初期化 */ /* initialize scores on frame [startt] */ for(i=0;i<wordhmmnum-1;i++) dwrk->wordtrellis[tn][i] = LOG_ZERO; dwrk->wordtrellis[tn][wordhmmnum-1] = dwrk->g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param); if (ccd_flag) { now->g_prev[startt] = dwrk->wordtrellis[tn][store_point]; } now->g[startt] = dwrk->wordtrellis[tn][0]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (ccd_flag) { if (back_rescan) { if (wordhmmnum-1 == crossword_point) { dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = -1; dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO; } } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; } } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; } now->wordend_frame[startt] = dwrk->wend_token_frame[tn][0]; now->wordend_gscore[startt] = dwrk->wend_token_gscore[tn][0]; } #endif } /* end of hmminfo->multipath */ endt = startt; /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */ /* main loop: start from [startt], and compute Viterbi toward [0] */ for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) { /* wordtrellisのワークエリアをスワップ */ i = tn; tn = tl; tl = i; node_exist_p = FALSE; /* TRUE if there is at least 1 survived node in this frame */ if (hmminfo->multipath) { /* 端のノード [t][wordhmmnum-1]は g[] を参照する */ /* the edge node [t][wordhmmnum-1] is equal to g[] */ /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */ /* expand trellis for node [t][wordhmmnum-2..0] */ tmpmax_store = LOG_ZERO; } else { /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */ /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */ tmptmp = LOG_ZERO; for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) { score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; if (tmptmp < score1) { j = ac->arc; tmptmp = score1; } } if (dwrk->g[t] > tmptmp) { tmpmax = dwrk->g[t]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (!back_rescan || wordhmmnum-1 == crossword_point) { dwrk->wend_token_frame[tn][wordhmmnum-1] = t; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[t]; } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j]; } } #endif } else { tmpmax = tmptmp; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j]; } #endif } /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */ /* check if the edge node is within score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = -1; dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO; } #endif } else { node_exist_p = TRUE; dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param); } } /* end of ~multipath */ /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */ /* expand trellis for node [t][wordhmmnum-2..0] */ for(i=wordhmmnum-2;i>=0;i--) { if (ccd_flag) { /* 最尤パスと最尤スコア tmpmax を見つける */ /* tmpmax2 は次回用 g_prev[] のための最大値(自己遷移を除いた最大値) */ /* find most likely path and the max score 'tmpmax' */ /* 'tmpmax2' is max score excluding self transition, for next g_prev[] */ if (! hmminfo->multipath) { if (i == store_point) { tmpmax2 = LOG_ZERO; } } tmpmax = LOG_ZERO; for (ac=whmm->state[i].ac;ac;ac=ac->next) { if (hmminfo->multipath) { if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t]; else if (t + 1 > startt) score1 = LOG_ZERO; else score1 = dwrk->wordtrellis[tl][ac->arc]; score1 += ac->a; } else { score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; } if (i <= crossword_point && ac->arc > crossword_point) { /* これは単語を越える遷移 (backscan 実行時) */ /* this is a transition across word (when backscan is enabled) */ score1 += now->lscore; /* add LM score */ } if (hmminfo->multipath) { if (i <= store_point && ac->arc > store_point) { if (tmpmax_store < score1) tmpmax_store = score1; } } else { if (i == store_point && i != ac->arc) { if (tmpmax2 < score1) tmpmax2 = score1; } } if (tmpmax < score1) { tmpmax = score1; j = ac->arc; } } /* スコアエンベロープチェック: 一定幅外なら落とす */ /* check if score of this node is within the score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { /* invalid node */ dwrk->wordtrellis[tn][i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } #endif if (! hmminfo->multipath) { if (i == store_point) now->g_prev[t] = LOG_ZERO; } } else { /* survived node */ if (! hmminfo->multipath) { if (i == store_point) now->g_prev[t] = tmpmax2; } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (hmminfo->multipath) { if ((back_rescan && i <= crossword_point && j > crossword_point) || j == wordhmmnum-1) { dwrk->wend_token_frame[tn][i] = t; dwrk->wend_token_gscore[tn][i] = tmpmax; } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } else { if (i <= crossword_point && j > crossword_point) { dwrk->wend_token_frame[tn][i] = t; dwrk->wend_token_gscore[tn][i] = tmpmax; } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } } #endif node_exist_p = TRUE; /* at least one node survive in this frame */ dwrk->wordtrellis[tn][i] = tmpmax; if (! hmminfo->multipath || i > 0) { /* compute output probability */ dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param); } } } else { /* not triphone */ /* backscan 無し: store_point, crossword_point は無関係 */ /* no backscan: store_point, crossword_point ignored */ tmpmax = LOG_ZERO; if (hmminfo->multipath) { for (ac=whmm->state[i].ac;ac;ac=ac->next) { if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t]; else if (t + 1 > startt) score1 = LOG_ZERO; else score1 = dwrk->wordtrellis[tl][ac->arc]; score1 += ac->a; if (tmpmax < score1) { tmpmax = score1; j = ac->arc; } } } else { for (ac=whmm->state[i].ac;ac;ac=ac->next) { score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; if (tmpmax < score1) { tmpmax = score1; j = ac->arc; } } } /* スコアエンベロープチェック: 一定幅外なら落とす */ /* check if score of this node is within the score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { /* invalid node */ dwrk->wordtrellis[tn][i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } #endif } else { /* survived node */ node_exist_p = TRUE; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (hmminfo->multipath) { if (j == wordhmmnum-1) { dwrk->wend_token_frame[tn][i] = t; dwrk->wend_token_gscore[tn][i] = tmpmax; } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } #endif /* score of node [t][i] has been determined here */ dwrk->wordtrellis[tn][i] = tmpmax; if (! hmminfo->multipath || i > 0) { dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param); } } } } /* end of node loop */ /* 時間 t のViterbi計算終了. 前向きスコアはscanした単語の始端 */ /* Viterbi end for frame [t]. the forward score is the score of word beginning scanned */ now->g[t] = dwrk->wordtrellis[tn][0]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { now->wordend_frame[t] = dwrk->wend_token_frame[tn][0]; now->wordend_gscore[t] = dwrk->wend_token_gscore[tn][0]; } #endif if (hmminfo->multipath) { /* triphone 時, 次段のために store_point のデータをg_prevに保存 */ /* store the scores crossing the store_point to g_prev, for next scan */ if (ccd_flag) { /* the max arc crossing the store_point always selected as tmpmax_score */ tmpmax_store -= store_point_maxarc; if (tmpmax_store < LOG_ZERO) tmpmax_store = LOG_ZERO; now->g_prev[t] = tmpmax_store; } } /* store the number of last computed frame */ if (node_exist_p) endt = t; /* scanした単語の第1パスでの始端時刻より先まで t が進んでおり,かつ この t においてスコアエンベロープによって生き残ったノードが一つも 無かったならば,このフレームで計算を打ち切りそれ以上先([0..t-1])は 計算しない */ /* if frame 't' already reached the beginning frame of scanned word in 1st pass and no node was survived in this frame (all nodes pruned by score envelope), terminate computation at this frame and do not computer further frame ([0..t-1]). */ if (t < now->estimated_next_t && (!node_exist_p)) { /* clear the rest scores */ for (i=t-1;i>=0;i--) { now->g[i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { now->wordend_frame[i] = -1; now->wordend_gscore[i] = LOG_ZERO; } #endif if (ccd_flag) now->g_prev[i] = LOG_ZERO; } /* terminate loop */ break; } } /* end of time loop */ if (debug2_flag) jlog("DEBUG: scanned: [%3d-%3d]\n", endt, startt); end_of_scan: if (hmminfo->multipath) { /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */ /* compute the total forward score (transition from state 0 to frame 0 */ if (endt == 0) { tmpmax = LOG_ZERO; for(ac=whmm->state[0].ac;ac;ac=ac->next) { score1 = dwrk->wordtrellis[tn][ac->arc] + ac->a; if (tmpmax < score1) tmpmax = score1; } now->final_g = score1; } else { now->final_g = LOG_ZERO; } } /* 次回 backscan のための情報格納 */ /* store data for next backscan */ if (ccd_flag) { if (store_point == (hmminfo->multipath ? wordhmmnum - 2 : wordhmmnum - 1)) { /* last_ph無し,かつ単語の音素長=1の場合、次回の scan_word() で 単語全体がもう一度再計算される. この場合, g_prev は,このscan_wordを開始する前のスコアを入れておく必要がある */ /* if there was no 'last_ph' and the scanned word consists of only 1 phone, the whole word should be re-computed in the future scan_word(). So the next 'g_prev[]' should be the initial forward scores before we begin Viterbi (= g[t]). */ for (t = startt; t>=0; t--) { now->g_prev[t] = dwrk->g[t]; } } #ifndef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (now->tail_g_score != LOG_ZERO) { if (now->prevgraph != NULL) { (now->prevgraph)->leftscore = now->tail_g_score; } } } #endif /* 次回のために now->last_ph を更新 */ /* update 'now->last_ph' for future scan_word() */ if (back_rescan) { now->last_ph = dwrk->phmmseq[0]; } else { now->last_ph = winfo->wseq[word][0]; } if (enable_iwsp && hmminfo->multipath) { now->last_ph_sp_attached = dwrk->has_sp[0]; } } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (! hmminfo->multipath) { if (r->graphout) { /* 次回の next_word 用に境界情報を調整 */ /* proceed word boundary for one step for next_word */ now->wordend_frame[peseqlen-1] = now->wordend_frame[0]; now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0]; for (t=0;t<peseqlen-1;t++) { now->wordend_frame[t] = now->wordend_frame[t+1]; now->wordend_gscore[t] = now->wordend_gscore[t+1]; } } } #endif /* free work area */ free_hmm(whmm); #ifdef TCD if (hmminfo->multipath) { if (ccd_flag) { jlog("DEBUG: last_ph = %s", (now->last_ph)->name); if (now->last_ph_sp_attached) jlog(" (sp attached)"); jlog("\n"); } } else { jlog("DEBUG: last_ph = %s\n", (now->last_ph)->name); } #endif }
/** * <JA> * 木構造化辞書上の状態の出力確率を計算する. * * @param wchmm [in] 木構造化辞書情報 * @param node [in] ノード番号 * @param last_wid [in] 直前単語(単語先頭のトライフォン計算に用いる) * @param t [in] 時間フレーム * @param param [in] 特徴量パラメータ構造体 (@a t 番目のベクトルについて計算する) * * @return 出力確率の対数値を返す. * </JA> * <EN> * Calculate output probability on a tree lexion node. This function * calculates log output probability of an input vector on time frame @a t * in input paramter @a param at a node on tree lexicon. * * @param wchmm [in] tree lexicon structure * @param node [in] node ID to compute the output probability * @param last_wid [in] word ID of last word hypothesis (used when the node is * within the word beginning phone and triphone is used. * @param t [in] time frame of input vector in @a param to compute. * @param param [in] input parameter structure * * @return the computed log probability. * </EN> * @callgraph * @callergraph */ LOGPROB outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param) { char rbuf[MAX_HMMNAME_LEN]; ///< Local workarea for HMM name conversion #ifndef PASS1_IWCD /* if cross-word triphone handling is disabled, we simply compute the output prob of the state */ return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param)); #else /* PASS1_IWCD */ /* state type and context cache is considered */ HMM_Logical *ohmm, *rhmm; RC_INFO *rset; LRC_INFO *lrset; CD_Set *lcd; WORD_INFO *winfo = wchmm->winfo; HTK_HMM_INFO *hmminfo = wchmm->hmminfo; /* the actual computation is different according to their context dependency handling */ switch(wchmm->outstyle[node]) { case AS_STATE: /* normal state (word-internal or context-independent )*/ /* compute as usual */ return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param)); case AS_LSET: /* node in word end phone */ /* compute approximated value using the state set in pseudo phone */ return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param)); case AS_RSET: /* note in the beginning phone of word */ /* depends on the last word hypothesis to compute the actual triphone */ rset = wchmm->state[node].out.rset; /* consult cache */ if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) { /* cache miss...calculate */ /* rset contains either defined biphone or pseudo biphone */ if (last_wid != WORD_INVALID) { /* lookup triphone with left-context (= last phoneme) */ if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) { rhmm = ohmm; } else { /* if triphone not found, try to use the bi-phone itself */ rhmm = rset->hmm; /* If the bi-phone is explicitly specified in hmmdefs/HMMList, use it. if both triphone and biphone not found in user-given hmmdefs/HMMList, use "pseudo" phone, as same as the end of word */ if (debug2_flag) { if (rhmm->is_pseudo) { error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); } } } } else { /* if last word is WORD_INVALID try to use the bi-phone itself */ rhmm = rset->hmm; /* If the bi-phone is explicitly specified in hmmdefs/HMMList, use it. if not, use "pseudo" phone, as same as the end of word */ if (debug2_flag) { if (rhmm->is_pseudo) { error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); } } } /* rhmm may be a pseudo phone */ /* store to cache */ if (rhmm->is_pseudo) { rset->last_is_lset = TRUE; rset->cache.lset = &(rhmm->body.pseudo->stateset[rset->state_loc]); } else { rset->last_is_lset = FALSE; rset->cache.state = rhmm->body.defined->s[rset->state_loc]; } rset->lastwid_cache = last_wid; } /* calculate outprob and return */ if (rset->last_is_lset) { return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param)); } else { return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param)); } case AS_LRSET: /* node in word with only one phoneme --- both beginning and end */ lrset = wchmm->state[node].out.lrset; if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) { /* cache miss...calculate */ rhmm = lrset->hmm; /* lookup cdset for given left context (= last phoneme) */ strcpy(rbuf, rhmm->name); if (last_wid != WORD_INVALID) { add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); } if (wchmm->category_tree) { #ifdef USE_OLD_IWCD lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf); #else /* use category-indexed cdset */ if (last_wid != WORD_INVALID && (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) { lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category); } else { lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category); } #endif } else { lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf); } if (lcd != NULL) { /* found, set to cache */ lrset->last_is_lset = TRUE; lrset->cache.lset = &(lcd->stateset[lrset->state_loc]); lrset->lastwid_cache = last_wid; } else { /* no relating lcdset found, falling to normal state */ if (rhmm->is_pseudo) { lrset->last_is_lset = TRUE; lrset->cache.lset = &(rhmm->body.pseudo->stateset[lrset->state_loc]); lrset->lastwid_cache = last_wid; } else { lrset->last_is_lset = FALSE; lrset->cache.state = rhmm->body.defined->s[lrset->state_loc]; lrset->lastwid_cache = last_wid; } } /*printf("[%s->%s]\n", lrset->hmm->name, rhmm->name);*/ } /* calculate outprob and return */ if (lrset->last_is_lset) { return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param)); } else { return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param)); } default: /* should not happen */ j_internal_error("outprob_style: no outprob style??\n"); return(LOG_ZERO); } #endif /* PASS1_IWCD */ }