示例#1
0
/** 
 * <JA>
 * 全ての単語末用カテゴリ付き pseudo phone set を生成する. 
 * 辞書上のすべての単語について,その末尾に登場しうるカテゴリ付き pseudo phone
 * set を生成する(文法認識用). 
 * 
 * @param wchmm [i/o] 木構造化辞書情報
 * </JA>
 * <EN>
 * Generate all possible category-indexed pseudo phone sets for
 * grammar recognition.
 * 
 * @param wchmm [i/o] tree lexicon
 * </EN>
 * @callgraph
 * @callergraph
 */
void
lcdset_register_with_category_all(WCHMM_INFO *wchmm)
{
  WORD_INFO *winfo;
  WORD_ID c1, w, w_prev;
  int i;
  HMM_Logical *ltmp;

  winfo = wchmm->winfo;

  /* (1) 単語終端の音素について */
  /*     word end phone */
  for(w=0;w<winfo->num;w++) {
    ltmp = winfo->wseq[w][winfo->wlen[w]-1];
    lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
  }
  /* (2)1音素単語の場合, 先行しうる単語の終端音素を考慮 */
  /*    for one-phoneme word, possible left context should be also considered */
  for(w=0;w<winfo->num;w++) {
    if (winfo->wlen[w] > 1) continue;
    for(c1=0;c1<wchmm->dfa->term_num;c1++) {
      if (! dfa_cp(wchmm->dfa, c1, winfo->wton[w])) continue;
      for(i=0;i<wchmm->dfa->term.wnum[c1];i++) {
	w_prev = wchmm->dfa->term.tw[c1][i];
	ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, wchmm->hmminfo);
	if (ltmp == NULL) continue; /* 1音素自身のlcd_setは(1)で作成済 */
	if (ltmp->is_pseudo) continue; /* pseudo phone ならlcd_setはいらない */
	lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]);
      }
    }
  }
}
示例#2
0
/** 
* <JA>
* 最後の1単語の前向きトレリスを計算して,文仮説の前向き尤度を更新する. 
* 
* @param now [i/o] 文仮説
* @param param [in] 入力パラメータ列
* @param r [in] 認識処理インスタンス
* 
* </JA>
* <EN>
* Compute the forward viterbi for the last word to update forward scores
* and ready for word connection.
* 
* @param now [i/o] hypothesis
* @param param [in] input parameter vectors
* @param r [in] recognition process instance
* 
* </EN>
* @callgraph
* @callergraph
*/
void
scan_word(NODE *now, HTK_Param *param, RecogProcess *r)
{
  int   i,t, j;
  HMM *whmm;
  A_CELL *ac;
  WORD_ID word;
  LOGPROB tmpmax, tmptmp, score1;
  int startt = 0, endt = 0;
  int wordhmmnum;
  LOGPROB tmpmax_store, store_point_maxarc; /* multipath */
  LOGPROB tmpmax2 = LOG_ZERO;
  int phmmlen;
  HMM_Logical *ret, *wend;
  int store_point;
  int crossword_point = 0;
  boolean back_rescan = FALSE;
  boolean node_exist_p;
  int tn=0;		       ///< Temporal pointer to current buffer
  int tl=0;		       ///< Temporal pointer to previous buffer

  /* store global values to local for rapid access */
  WORD_INFO *winfo;
  HTK_HMM_INFO *hmminfo;
  LOGPROB *framemaxscore;
  int peseqlen;
  boolean ccd_flag;
  boolean enable_iwsp;
#ifdef SCAN_BEAM
  LOGPROB scan_beam_thres;
#endif
  StackDecode *dwrk;

  winfo = r->lm->winfo;
  hmminfo = r->am->hmminfo;
  dwrk = &(r->pass2);
  peseqlen = r->peseqlen;
  framemaxscore = r->pass2.framemaxscore;
  ccd_flag = r->ccd_flag;
  enable_iwsp = r->lm->config->enable_iwsp; /* multipath */
#ifdef SCAN_BEAM
  scan_beam_thres = r->config->pass2.scan_beam_thres;
#endif

  if (hmminfo->multipath) {
    store_point = -1;
  } else {
    store_point = 0;
  }

  /* ----------------------- prepare HMM ----------------------- */

  if (ccd_flag) {
    /* 直前の音素があれば,そこまでさかのぼって scan する */
    /* if there are any last phone, enable backscan */
    if (now->last_ph == NULL) {
      /* initial score: now->g[] */
      /* scan range: phones in now->seq[now->seqnum-1] */
      back_rescan = FALSE;
    } else {
      /* initial score: now->g_prev[] (1-phone before)*/
      /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */
      back_rescan = TRUE;
    }
  }
#ifdef TCD
  if (now->last_ph != NULL) {
    jlog("DEBUG: inherited last_ph: %s\n", (now->last_ph)->name);
    if (now->last_ph_sp_attached) jlog("DEBUG: (sp attached)\n"); /* multipath */
  } else {
    jlog("DEBUG: no last_ph inherited\n");
  }
#endif

  /* scan 範囲分のHMMを準備 */
  /* prepare HMM of the scan range */
  word = now->seq[now->seqnum-1];

  if (ccd_flag) {

    if (back_rescan) {

      /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */

      phmmlen = winfo->wlen[word] + 1;
      if (phmmlen > dwrk->phmmlen_max) {
        j_internal_error("scan_word: num of phonemes in a word exceed phmmlenmax (%d) ?\n", dwrk->phmmlen_max);
      }
      for (i=0;i<phmmlen - 2;i++) dwrk->phmmseq[i] = winfo->wseq[word][i];
      if (enable_iwsp && hmminfo->multipath) {
        for (i=0;i<phmmlen - 2;i++) dwrk->has_sp[i] = FALSE;
      }

      /* 最終単語と last_ph 間の単語間triphoneを考慮 */
      /* consider cross-word context dependency between the last word and now->last_ph */
      wend = winfo->wseq[word][winfo->wlen[word]-1];
      ret = get_right_context_HMM(wend, now->last_ph->name, hmminfo);
      if (ret == NULL) {	/* triphone not found */
        /* fallback to the original bi/mono-phone */
        /* error if the original is pseudo phone (not explicitly defined
        in hmmdefs/hmmlist) */
        /* exception: word with 1 phone (triphone may exist in the next expansion */
        if (winfo->wlen[word] > 1 && wend->is_pseudo) {
          error_missing_right_triphone(wend, now->last_ph->name);
        }
        dwrk->phmmseq[phmmlen-2] = wend;
      } else {
        dwrk->phmmseq[phmmlen-2] = ret;
      }
      ret = get_left_context_HMM(now->last_ph, wend->name, hmminfo);
      if (ret == NULL) {
        /* fallback to the original bi/mono-phone */
        /* error if the original is pseudo phone (not explicitly defined
        in hmmdefs/hmmlist) */
        if (now->last_ph->is_pseudo) {
          error_missing_left_triphone(now->last_ph, wend->name);
        }
        dwrk->phmmseq[phmmlen-1] = now->last_ph;
      } else {
        dwrk->phmmseq[phmmlen-1] = ret;
      }

      if (enable_iwsp && hmminfo->multipath) {
        dwrk->has_sp[phmmlen-2] = TRUE;
        dwrk->has_sp[phmmlen-1] = now->last_ph_sp_attached;
      }

#ifdef TCD
      jlog("DEBUG: w=");
      for(i=0;i<winfo->wlen[word];i++) {
        jlog(" %s",(winfo->wseq[word][i])->name);
        if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)");
      }
      jlog(" | %s\n", (now->last_ph)->name);
      if (hmminfo->multipath && now->last_ph_sp_attached) jlog("DEBUG:   (sp)\n");
      jlog("DEBUG: scan for:");

      for (i=0;i<phmmlen;i++) {
        jlog(" %s", dwrk->phmmseq[i]->name);
        if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)");
      }
      jlog("\n");
#endif

      /* 単語HMMを作る */
      /* make word HMM */
      whmm = new_make_word_hmm(hmminfo, dwrk->phmmseq, phmmlen, (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL);
      if (whmm == NULL) {
        j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]);
      }

      /* backscan なので,計算前の g[] 初期値は now->g_prev[] を使用 */
      /* As backscan enabled, the initial forward score g[] is set by
      now->g_prev[] */
      for (t=0;t<peseqlen;t++) {
        dwrk->g[t]=now->g_prev[t];

      }

      /* 次段用のg_prevを格納するノード位置を設定 */
      /* set where to store scores as new g_prev[] for the next backscan
      in the HMM */
      if (hmminfo->multipath) {
        store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2;
        store_point_maxarc = max_out_arc(dwrk->phmmseq[0]);
        if (enable_iwsp && dwrk->has_sp[0]) {
          store_point += hmm_logical_state_num(hmminfo->sp) - 2;
          if (store_point_maxarc < max_out_arc(hmminfo->sp)) {
            store_point_maxarc = max_out_arc(hmminfo->sp);
          }
        }
      } else {
        store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2 - 1;
      }
      /* scan中に直前単語とこの単語をまたぐ場所を設定 */
      /* set where is the connection point of the last word in the HMM */
      if (hmminfo->multipath) {
        crossword_point = whmm->len - hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]);
        if (enable_iwsp && dwrk->has_sp[phmmlen-1]) {
          crossword_point -= hmm_logical_state_num(hmminfo->sp) - 2;
        }
      } else {
        crossword_point = whmm->len - (hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]) - 2) - 1;
      }

    } else {			/* not backscan mode */

      /* scan range: phones in now->seq[now->seqnum-1] */

#ifdef TCD
      jlog("DEBUG: scan(org):");
      for (i=0;i<winfo->wlen[word];i++) {
        jlog(" %s", (winfo->wseq[word][i])->name);
      }
      jlog("\n");
#endif

      if (enable_iwsp && hmminfo->multipath) {
        /* 必要ならばショートポーズを挟み込む位置を指定する */
        for(i=0;i<winfo->wlen[word];i++) {
          dwrk->has_sp[i] = FALSE;
        }
        dwrk->has_sp[winfo->wlen[word]-1] = TRUE;
      }

      /* 単語HMMを作る */
      /* make word HMM */
      whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL);
      if (whmm == NULL) {
        j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]);
      }

      /* 計算前の g[] 初期値は now->g[] を使用 */
      /* the initial forward score g[] is set by now->g[] */
      for (t=0;t<peseqlen;t++) {
        dwrk->g[t]=now->g[t];
      }

      /* 次段用のg_prevを格納するノード位置を設定 */
      /* set where to store scores as new g_prev[] for the next backscan
      in the HMM */
      if (hmminfo->multipath) {
        store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2;
        store_point_maxarc = max_out_arc(winfo->wseq[word][0]);
        if (enable_iwsp && dwrk->has_sp[0]) {
          store_point += hmm_logical_state_num(hmminfo->sp) - 2;
          if (store_point_maxarc < max_out_arc(hmminfo->sp)) {
            store_point_maxarc = max_out_arc(hmminfo->sp);
          }
        }
      } else {
        store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2 - 1;
      }

      /* scan中に直前単語とこの単語をまたぐ場所は,なし */
      /* the connection point of the last word is not exist in the HMM */
      crossword_point = -1;
    }

  } else {			/* ccd_flag == FALSE */

    if (enable_iwsp && hmminfo->multipath) {
      /* 必要ならばショートポーズを挟み込む位置を指定する */
      for(i=0;i<winfo->wlen[word];i++) {
        dwrk->has_sp[i] = FALSE;
      }
      dwrk->has_sp[winfo->wlen[word]-1] = TRUE;
    }

    /* 音素環境非依存の場合は単純に最終単語分の HMM を作成 */
    /* for monophone: simple make HMM for the last word */
    whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL);
    if (whmm == NULL) {
      j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]);
    }

    /* 計算前の g[] 初期値は now->g[] を使用 */
    /* the initial forward score g[] is set by now->g[] */
    for (t=0;t<peseqlen;t++) {
      dwrk->g[t]=now->g[t];
    }

  }

#ifdef TCD
  jlog("DEBUG: whmm len	  = %d\n",whmm->len);
  jlog("DEBUG: crossword_point = %d\n", crossword_point);
  jlog("DEBUG: g[] store point = %d\n", store_point);
#endif

  wordhmmnum = whmm->len;
  if (wordhmmnum >= winfo->maxwn + 10) {
    j_internal_error("scan_word: word too long (>%d)\n", winfo->maxwn + 10);
  }

#ifndef GRAPHOUT_PRECISE_BOUNDARY
  if (r->graphout) {
    if (ccd_flag) {
      now->tail_g_score = now->g[now->bestt];
    }
  }
#endif

  /* ----------------------- do scan ----------------------- */

  /* scan開始点を検索 -> starttへ*/
  /* search for the start frame -> set to startt */
  for(t = peseqlen-1; t >=0 ; t--) {
    if (
#ifdef SCAN_BEAM
      dwrk->g[t] > framemaxscore[t] - scan_beam_thres &&
#endif
      dwrk->g[t] > LOG_ZERO) {
        break;
    }
  }
  if (t < 0) {			/* no node has score > LOG_ZERO */
    for(t=0;t<peseqlen;t++) {
      if (ccd_flag) now->g_prev[t] = LOG_ZERO;
      now->g[t] = LOG_ZERO;
    }
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      for(t=0;t<peseqlen;t++) {
        now->wordend_frame[t] = -1;
        now->wordend_gscore[t] = LOG_ZERO;
      }
    }
#endif
    goto end_of_scan;
  }
  startt = t;

  /* clear [startt+1..peseqlen-1] */
  for(t=peseqlen-1;t>startt;t--) {
    if (ccd_flag) now->g_prev[t] = LOG_ZERO;
    now->g[t] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      now->wordend_frame[t] = -1;
      now->wordend_gscore[t] = LOG_ZERO;
    }
#endif
  }

  /* バッファポインタ初期化 */
  tn = 0; tl = 1;

#ifdef GRAPHOUT_PRECISE_BOUNDARY
  if (r->graphout) {
    for(i=0;i<wordhmmnum;i++) {
      dwrk->wend_token_frame[tn][i] = -1;
      dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
    }
  }
#endif

  if (! hmminfo->multipath) {
    /* Below initialization is not needed on multipath version, since
    the actual viterbi will begin at frame 0 in multipath mode in main loop */

    /* 時間 [startt] 上の値を初期化 */
    /* initialize scores on frame [startt] */
    for(i=0;i<wordhmmnum-1;i++) dwrk->wordtrellis[tn][i] = LOG_ZERO;
    dwrk->wordtrellis[tn][wordhmmnum-1] = dwrk->g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param);
    if (ccd_flag) {
      now->g_prev[startt] = dwrk->wordtrellis[tn][store_point];
    }
    now->g[startt] = dwrk->wordtrellis[tn][0];

#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      if (ccd_flag) {
        if (back_rescan) {
          if (wordhmmnum-1 == crossword_point) {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = startt;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt];
          } else {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = -1;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO;
          }
        } else {
          dwrk->wend_token_frame[tn][wordhmmnum-1] = startt;
          dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt];
        }
      } else {
        dwrk->wend_token_frame[tn][wordhmmnum-1] = startt;
        dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt];
      }
      now->wordend_frame[startt] = dwrk->wend_token_frame[tn][0];
      now->wordend_gscore[startt] = dwrk->wend_token_gscore[tn][0];
    }
#endif
  } /* end of hmminfo->multipath */

  endt = startt;

  /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */
  /* main loop: start from [startt], and compute Viterbi toward [0] */
  for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) {

    /* wordtrellisのワークエリアをスワップ */
    i = tn; tn = tl; tl = i;

    node_exist_p = FALSE;	/* TRUE if there is at least 1 survived node in this frame */

    if (hmminfo->multipath) {

      /* 端のノード [t][wordhmmnum-1]は g[] を参照する */
      /* the edge node [t][wordhmmnum-1] is equal to g[] */

      /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */
      /* expand trellis for node [t][wordhmmnum-2..0] */
      tmpmax_store = LOG_ZERO;

    } else {

      /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */
      /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */
      tmptmp = LOG_ZERO;
      for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) {
        score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a;
        if (tmptmp < score1) {
          j = ac->arc;
          tmptmp = score1;
        }
      }
      if (dwrk->g[t] > tmptmp) {
        tmpmax = dwrk->g[t];
#ifdef GRAPHOUT_PRECISE_BOUNDARY
        if (r->graphout) {
          if (!back_rescan || wordhmmnum-1 == crossword_point) {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = t;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[t];
          } else {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j];
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j];
          }
        }
#endif
      } else {
        tmpmax = tmptmp;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
        if (r->graphout) {
          dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j];
          dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j];
        }
#endif
      }

      /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */
      /* check if the edge node is within score envelope */
      if (
#ifdef SCAN_BEAM
        tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
        tmpmax <= LOG_ZERO
        ) {
          dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
          if (r->graphout) {
            dwrk->wend_token_frame[tn][wordhmmnum-1] = -1;
            dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO;
          }
#endif
      } else {
        node_exist_p = TRUE;
        dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param);
      }

    } /* end of ~multipath */

    /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */
    /* expand trellis for node [t][wordhmmnum-2..0] */
    for(i=wordhmmnum-2;i>=0;i--) {

      if (ccd_flag) {

        /* 最尤パスと最尤スコア tmpmax を見つける */
        /* tmpmax2 は次回用 g_prev[] のための最大値(自己遷移を除いた最大値) */
        /* find most likely path and the max score 'tmpmax' */
        /* 'tmpmax2' is max score excluding self transition, for next g_prev[] */
        if (! hmminfo->multipath) {
          if (i == store_point) {
            tmpmax2 = LOG_ZERO;
          }
        }
        tmpmax = LOG_ZERO;
        for (ac=whmm->state[i].ac;ac;ac=ac->next) {
          if (hmminfo->multipath) {
            if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t];
            else if (t + 1 > startt) score1 = LOG_ZERO;
            else score1 = dwrk->wordtrellis[tl][ac->arc];
            score1 += ac->a;
          } else {
            score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a;
          }
          if (i <= crossword_point && ac->arc > crossword_point) {
            /* これは単語を越える遷移 (backscan 実行時) */
            /* this is a transition across word (when backscan is enabled) */
            score1 += now->lscore; /* add LM score */
          }

          if (hmminfo->multipath) {
            if (i <= store_point && ac->arc > store_point) {
              if (tmpmax_store < score1) tmpmax_store = score1;
            }
          } else {
            if (i == store_point && i != ac->arc) {
              if (tmpmax2 < score1) tmpmax2 = score1;
            }
          }

          if (tmpmax < score1) {
            tmpmax = score1;
            j = ac->arc;
          }
        }

        /* スコアエンベロープチェック: 一定幅外なら落とす */
        /* check if score of this node is within the score envelope */
        if (
#ifdef SCAN_BEAM
          tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
          tmpmax <= LOG_ZERO
          ) {  /* invalid node */
            dwrk->wordtrellis[tn][i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
            if (r->graphout) {
              dwrk->wend_token_frame[tn][i] = -1;
              dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
            }
#endif
            if (! hmminfo->multipath) {
              if (i == store_point) now->g_prev[t] = LOG_ZERO;
            }
        } else { /* survived node */
          if (! hmminfo->multipath) {
            if (i == store_point) now->g_prev[t] = tmpmax2;
          }
#ifdef GRAPHOUT_PRECISE_BOUNDARY
          if (r->graphout) {

            if (hmminfo->multipath) {
              if ((back_rescan && i <= crossword_point && j > crossword_point)
                || j == wordhmmnum-1) {
                  dwrk->wend_token_frame[tn][i] = t;
                  dwrk->wend_token_gscore[tn][i] = tmpmax;
              } else {
                dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
                dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
              }
            } else {
              if (i <= crossword_point && j > crossword_point) {
                dwrk->wend_token_frame[tn][i] = t;
                dwrk->wend_token_gscore[tn][i] = tmpmax;
              } else {
                dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
                dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
              }
            }
          }
#endif
          node_exist_p = TRUE;	/* at least one node survive in this frame */

          dwrk->wordtrellis[tn][i] = tmpmax;
          if (! hmminfo->multipath || i > 0) {
            /* compute output probability */
            dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param);
          }
        }

      } else {			/* not triphone */

        /* backscan 無し: store_point, crossword_point は無関係 */
        /* no backscan: store_point, crossword_point ignored */
        tmpmax = LOG_ZERO;
        if (hmminfo->multipath) {
          for (ac=whmm->state[i].ac;ac;ac=ac->next) {
            if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t];
            else if (t + 1 > startt) score1 = LOG_ZERO;
            else score1 = dwrk->wordtrellis[tl][ac->arc];
            score1 += ac->a;
            if (tmpmax < score1) {
              tmpmax = score1;
              j = ac->arc;
            }
          }
        } else {
          for (ac=whmm->state[i].ac;ac;ac=ac->next) {
            score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a;
            if (tmpmax < score1) {
              tmpmax = score1;
              j = ac->arc;
            }
          }
        }

        /* スコアエンベロープチェック: 一定幅外なら落とす */
        /* check if score of this node is within the score envelope */
        if (
#ifdef SCAN_BEAM
          tmpmax <= framemaxscore[t] - scan_beam_thres ||
#endif
          tmpmax <= LOG_ZERO
          ) {
            /* invalid node */
            dwrk->wordtrellis[tn][i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
            if (r->graphout) {
              dwrk->wend_token_frame[tn][i] = -1;
              dwrk->wend_token_gscore[tn][i] = LOG_ZERO;
            }
#endif
        } else {
          /* survived node */
          node_exist_p = TRUE;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
          if (r->graphout) {
            if (hmminfo->multipath) {
              if (j == wordhmmnum-1) {
                dwrk->wend_token_frame[tn][i] = t;
                dwrk->wend_token_gscore[tn][i] = tmpmax;
              } else {
                dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
                dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
              }
            } else {
              dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j];
              dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j];
            }
          }
#endif
          /* score of node [t][i] has been determined here */
          dwrk->wordtrellis[tn][i] = tmpmax;
          if (! hmminfo->multipath || i > 0) {
            dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param);
          }
        }

      }
    } /* end of node loop */

    /* 時間 t のViterbi計算終了. 前向きスコアはscanした単語の始端 */
    /* Viterbi end for frame [t].  the forward score is the score of word
    beginning scanned */
    now->g[t] = dwrk->wordtrellis[tn][0];
#ifdef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      now->wordend_frame[t] = dwrk->wend_token_frame[tn][0];
      now->wordend_gscore[t] = dwrk->wend_token_gscore[tn][0];
    }
#endif

    if (hmminfo->multipath) {
      /* triphone 時, 次段のために store_point のデータをg_prevに保存 */
      /* store the scores crossing the store_point to g_prev, for next scan */
      if (ccd_flag) {
        /* the max arc crossing the store_point always selected as tmpmax_score */ 
        tmpmax_store -= store_point_maxarc;
        if (tmpmax_store < LOG_ZERO) tmpmax_store = LOG_ZERO;
        now->g_prev[t] = tmpmax_store;
      }
    }

    /* store the number of last computed frame */
    if (node_exist_p) endt = t;

    /* scanした単語の第1パスでの始端時刻より先まで t が進んでおり,かつ
    この t においてスコアエンベロープによって生き残ったノードが一つも
    無かったならば,このフレームで計算を打ち切りそれ以上先([0..t-1])は
    計算しない */
    /* if frame 't' already reached the beginning frame of scanned word
    in 1st pass and no node was survived in this frame (all nodes pruned
    by score envelope), terminate computation at this frame and
    do not computer further frame ([0..t-1]). */
    if (t < now->estimated_next_t && (!node_exist_p)) {
      /* clear the rest scores */
      for (i=t-1;i>=0;i--) {
        now->g[i] = LOG_ZERO;
#ifdef GRAPHOUT_PRECISE_BOUNDARY
        if (r->graphout) {
          now->wordend_frame[i] = -1;
          now->wordend_gscore[i] = LOG_ZERO;
        }
#endif
        if (ccd_flag) now->g_prev[i] = LOG_ZERO;
      }
      /* terminate loop */
      break;
    }

  } /* end of time loop */

  if (debug2_flag) jlog("DEBUG: scanned: [%3d-%3d]\n", endt, startt);

end_of_scan:

  if (hmminfo->multipath) {
    /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */
    /* compute the total forward score (transition from state 0 to frame 0 */
    if (endt == 0) {
      tmpmax = LOG_ZERO;
      for(ac=whmm->state[0].ac;ac;ac=ac->next) {
        score1 = dwrk->wordtrellis[tn][ac->arc] + ac->a;
        if (tmpmax < score1) tmpmax = score1;
      }
      now->final_g = score1;
    } else {
      now->final_g = LOG_ZERO;
    }
  }

  /* 次回 backscan のための情報格納 */
  /* store data for next backscan */
  if (ccd_flag) {
    if (store_point == (hmminfo->multipath ? wordhmmnum - 2 : wordhmmnum - 1)) {
      /* last_ph無し,かつ単語の音素長=1の場合、次回の scan_word() で
      単語全体がもう一度再計算される. この場合,
      g_prev は,このscan_wordを開始する前のスコアを入れておく必要がある */
      /* if there was no 'last_ph' and the scanned word consists of only
      1 phone, the whole word should be re-computed in the future scan_word().
      So the next 'g_prev[]' should be the initial forward scores
      before we begin Viterbi (= g[t]). */
      for (t = startt; t>=0; t--) {
        now->g_prev[t] = dwrk->g[t];
      }
    }
#ifndef GRAPHOUT_PRECISE_BOUNDARY
    if (r->graphout) {
      if (now->tail_g_score != LOG_ZERO) {
        if (now->prevgraph != NULL) {
          (now->prevgraph)->leftscore = now->tail_g_score;
        }
      }
    }
#endif
    /* 次回のために now->last_ph を更新 */
    /* update 'now->last_ph' for future scan_word() */
    if (back_rescan) {
      now->last_ph = dwrk->phmmseq[0];
    } else {
      now->last_ph = winfo->wseq[word][0];
    }
    if (enable_iwsp && hmminfo->multipath) {
      now->last_ph_sp_attached = dwrk->has_sp[0];
    }
  }

#ifdef GRAPHOUT_PRECISE_BOUNDARY
  if (! hmminfo->multipath) {
    if (r->graphout) {
      /* 次回の next_word 用に境界情報を調整 */
      /* proceed word boundary for one step for next_word */
      now->wordend_frame[peseqlen-1] = now->wordend_frame[0];
      now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0];
      for (t=0;t<peseqlen-1;t++) {
        now->wordend_frame[t] = now->wordend_frame[t+1];
        now->wordend_gscore[t] = now->wordend_gscore[t+1];
      }
    }
  }
#endif

  /* free work area */
  free_hmm(whmm);
#ifdef TCD
  if (hmminfo->multipath) {
    if (ccd_flag) {
      jlog("DEBUG: last_ph = %s", (now->last_ph)->name);
      if (now->last_ph_sp_attached) jlog(" (sp attached)");
      jlog("\n");
    }
  } else {
    jlog("DEBUG: last_ph = %s\n", (now->last_ph)->name);
  }
#endif
}
示例#3
0
/** 
 * <JA>
 * 木構造化辞書上の状態の出力確率を計算する. 
 * 
 * @param wchmm [in] 木構造化辞書情報
 * @param node [in] ノード番号
 * @param last_wid [in] 直前単語(単語先頭のトライフォン計算に用いる)
 * @param t [in] 時間フレーム
 * @param param [in] 特徴量パラメータ構造体 (@a t 番目のベクトルについて計算する)
 * 
 * @return 出力確率の対数値を返す. 
 * </JA>
 * <EN>
 * Calculate output probability on a tree lexion node.  This function
 * calculates log output probability of an input vector on time frame @a t
 * in input paramter @a param at a node on tree lexicon.
 * 
 * @param wchmm [in] tree lexicon structure
 * @param node [in] node ID to compute the output probability
 * @param last_wid [in] word ID of last word hypothesis (used when the node is
 * within the word beginning phone and triphone is used.
 * @param t [in] time frame of input vector in @a param to compute.
 * @param param [in] input parameter structure
 * 
 * @return the computed log probability.
 * </EN>
 * @callgraph
 * @callergraph
 */
LOGPROB
outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param)
{
  char rbuf[MAX_HMMNAME_LEN]; ///< Local workarea for HMM name conversion

#ifndef PASS1_IWCD
  
  /* if cross-word triphone handling is disabled, we simply compute the
     output prob of the state */
  return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param));
  
#else  /* PASS1_IWCD */

  /* state type and context cache is considered */
  HMM_Logical *ohmm, *rhmm;
  RC_INFO *rset;
  LRC_INFO *lrset;
  CD_Set *lcd;
  WORD_INFO *winfo = wchmm->winfo;
  HTK_HMM_INFO *hmminfo = wchmm->hmminfo;

  /* the actual computation is different according to their context dependency
     handling */
  switch(wchmm->outstyle[node]) {
  case AS_STATE:
    /* normal state (word-internal or context-independent )*/
    /* compute as usual */
    return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param));
  case AS_LSET:
    /* node in word end phone */
    /* compute approximated value using the state set in pseudo phone */
    return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param));
  case AS_RSET:
    /* note in the beginning phone of word */
    /* depends on the last word hypothesis to compute the actual triphone */
    rset = wchmm->state[node].out.rset;
    /* consult cache */
    if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) {
      /* cache miss...calculate */
      /* rset contains either defined biphone or pseudo biphone */
      if (last_wid != WORD_INVALID) {
	/* lookup triphone with left-context (= last phoneme) */
	if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
	  rhmm = ohmm;
	} else {
	  /* if triphone not found, try to use the bi-phone itself */
	  rhmm = rset->hmm;
	  /* If the bi-phone is explicitly specified in hmmdefs/HMMList,
	     use it.  if both triphone and biphone not found in user-given
	     hmmdefs/HMMList, use "pseudo" phone, as same as the end of word */
	  if (debug2_flag) {
	    if (rhmm->is_pseudo) {
	    error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
	    }
	  }
	}
      } else {
	/* if last word is WORD_INVALID try to use the bi-phone itself */
	rhmm = rset->hmm;
	/* If the bi-phone is explicitly specified in hmmdefs/HMMList,
	   use it.  if not, use "pseudo" phone, as same as the end of word */
	if (debug2_flag) {
	  if (rhmm->is_pseudo) {
	    error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
	  }
	}
      }
      /* rhmm may be a pseudo phone */
      /* store to cache */
      if (rhmm->is_pseudo) {
	rset->last_is_lset  = TRUE;
	rset->cache.lset    = &(rhmm->body.pseudo->stateset[rset->state_loc]);
      } else {
	rset->last_is_lset  = FALSE;
	rset->cache.state   = rhmm->body.defined->s[rset->state_loc];
      }
      rset->lastwid_cache = last_wid;
    }
    /* calculate outprob and return */
    if (rset->last_is_lset) {
      return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param));
    } else {
      return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param));
    }
  case AS_LRSET:
    /* node in word with only one phoneme --- both beginning and end */
    lrset = wchmm->state[node].out.lrset;
    if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) {
      /* cache miss...calculate */
      rhmm = lrset->hmm;
      /* lookup cdset for given left context (= last phoneme) */
      strcpy(rbuf, rhmm->name);
      if (last_wid != WORD_INVALID) {
	add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name);
      }
      if (wchmm->category_tree) {
#ifdef USE_OLD_IWCD
	lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
#else
	/* use category-indexed cdset */
	if (last_wid != WORD_INVALID &&
	    (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) {
	  lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category);
	} else {
	  lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category);
	}
#endif
      } else {
	lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf);
      }
      if (lcd != NULL) {	/* found, set to cache */
	lrset->last_is_lset  = TRUE;
        lrset->cache.lset    = &(lcd->stateset[lrset->state_loc]);
        lrset->lastwid_cache = last_wid;
      } else {
	/* no relating lcdset found, falling to normal state */
	if (rhmm->is_pseudo) {
	  lrset->last_is_lset  = TRUE;
	  lrset->cache.lset    = &(rhmm->body.pseudo->stateset[lrset->state_loc]);
	  lrset->lastwid_cache = last_wid;
	} else {
	  lrset->last_is_lset  = FALSE;
	  lrset->cache.state   = rhmm->body.defined->s[lrset->state_loc];
	  lrset->lastwid_cache = last_wid;
	}
      }
      /*printf("[%s->%s]\n", lrset->hmm->name, rhmm->name);*/
    }
    /* calculate outprob and return */
    if (lrset->last_is_lset) {
      return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param));
    } else {
      return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param));
    }
  default:
    /* should not happen */
    j_internal_error("outprob_style: no outprob style??\n");
    return(LOG_ZERO);
  }

#endif  /* PASS1_IWCD */

}