static void phone_transition(phone_loop_search_t *pls, int frame_idx) { int32 thresh = pls->best_score + pls->pbeam; int nf = frame_idx + 1; int i; /* Now transition out of phones whose last states are inside the * phone transition beam. */ for (i = 0; i < pls->n_phones; ++i) { hmm_t *hmm = (hmm_t *)&pls->phones[i]; int32 newphone_score; int j; if (hmm_frame(hmm) != nf) continue; newphone_score = hmm_out_score(hmm) + pls->pip; if (newphone_score BETTER_THAN thresh) { /* Transition into all phones using the usual Viterbi rule. */ for (j = 0; j < pls->n_phones; ++j) { hmm_t *nhmm = (hmm_t *)&pls->phones[j]; if (hmm_frame(nhmm) < frame_idx || newphone_score BETTER_THAN hmm_in_score(nhmm)) { hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); } } } } }
static void fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm) { root_chan_t *rhmm; chan_t *hmm; int32 i, cf, w, *awl; cf = frame_idx; /* Renormalize individual word channels */ i = ngs->n_active_word[cf & 0x1]; awl = ngs->active_word_list[cf & 0x1]; for (w = *(awl++); i > 0; --i, w = *(awl++)) { rhmm = (root_chan_t *) ngs->word_chan[w]; if (hmm_frame(&rhmm->hmm) == cf) { hmm_normalize(&rhmm->hmm, norm); } for (hmm = rhmm->next; hmm; hmm = hmm->next) { if (hmm_frame(&hmm->hmm) == cf) { hmm_normalize(&hmm->hmm, norm); } } } ngs->renormalized = TRUE; }
static void compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx) { int32 i, w; int32 *awl; root_chan_t *rhmm; chan_t *hmm; acmod_clear_active(ps_search_acmod(ngs)); i = ngs->n_active_word[frame_idx & 0x1]; awl = ngs->active_word_list[frame_idx & 0x1]; for (w = *(awl++); i > 0; --i, w = *(awl++)) { rhmm = (root_chan_t *)ngs->word_chan[w]; if (hmm_frame(&rhmm->hmm) == frame_idx) { acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm); } for (hmm = rhmm->next; hmm; hmm = hmm->next) { if (hmm_frame(&hmm->hmm) == frame_idx) { acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm); } } } }
static void prune_hmms(state_align_search_t *sas, int frame_idx) { int nf = frame_idx + 1; int i; /* Check all phones to see if they remain active in the next frame. */ for (i = 0; i < sas->n_phones; ++i) { hmm_t *hmm = sas->hmms + i; if (hmm_frame(hmm) < frame_idx) continue; hmm_frame(hmm) = nf; } }
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame) { hmm_in_score(h) = score; hmm_in_history(h) = histid; hmm_frame(h) = frame; }
/** Evaluate active PHMMs */ static int32 phmm_eval_all(allphone_search_t * allphs, const int16 * senscr) { s3cipid_t ci; phmm_t *p; int32 best; bin_mdef_t *mdef; phmm_t **ci_phmm; mdef = ((ps_search_t *) allphs)->acmod->mdef; ci_phmm = allphs->ci_phmm; best = WORST_SCORE; hmm_context_set_senscore(allphs->hmmctx, senscr); for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned) ci]; p; p = p->next) { if (hmm_frame(&(p->hmm)) == allphs->frame) { int32 score; allphs->n_hmm_eval++; score = hmm_vit_eval((hmm_t *) p); if (score > best) best = score; } } } return best; }
static void record_transitions(state_align_search_t *sas, int frame_idx) { state_align_hist_t *tokens; int i; /* Push another frame of tokens on the stack. */ extend_tokenstack(sas, frame_idx); tokens = sas->tokens + frame_idx * sas->n_emit_state; /* Scan all active HMMs */ for (i = 0; i < sas->n_phones; ++i) { hmm_t *hmm = sas->hmms + i; int j; if (hmm_frame(hmm) < frame_idx) continue; for (j = 0; j < sas->hmmctx->n_emit_state; ++j) { int state_idx = i * sas->hmmctx->n_emit_state + j; /* Record their backpointers on the token stack. */ tokens[state_idx].id = hmm_history(hmm, j); tokens[state_idx].score = hmm_score(hmm, j); /* Update backpointer fields with state index. */ hmm_history(hmm, j) = state_idx; } } }
void hmm_enter_obj(hmm_t *h, int32 score, void *histobj, int32 frame) { hmm_in_score(h) = score; hmm_in_histobj(h) = histobj; hmm_frame(h) = frame; }
static void prune_hmms(phone_loop_search_t *pls, int frame_idx) { int32 thresh = pls->best_score + pls->beam; int nf = frame_idx + 1; int i; /* Check all phones to see if they remain active in the next frame. */ for (i = 0; i < pls->n_phones; ++i) { hmm_t *hmm = (hmm_t *)&pls->phones[i]; if (hmm_frame(hmm) < frame_idx) continue; /* Retain if score better than threshold. */ if (hmm_bestscore(hmm) BETTER_THAN thresh) { hmm_frame(hmm) = nf; } else hmm_clear_scores(hmm); } }
static void phone_transition(state_align_search_t *sas, int frame_idx) { int nf = frame_idx + 1; int i; for (i = 0; i < sas->n_phones - 1; ++i) { hmm_t *hmm, *nhmm; int32 newphone_score; hmm = sas->hmms + i; if (hmm_frame(hmm) != nf) continue; newphone_score = hmm_out_score(hmm); /* Transition into next phone using the usual Viterbi rule. */ nhmm = hmm + 1; if (hmm_frame(nhmm) < frame_idx || newphone_score BETTER_THAN hmm_in_score(nhmm)) { hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); } } }
static void fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx) { int32 i, w, nw, bestscore; int32 *awl; root_chan_t *rhmm; chan_t *hmm; nw = ngs->n_active_word[frame_idx & 0x1]; awl = ngs->active_word_list[frame_idx & 0x1]; bestscore = WORST_SCORE; ngs->st.n_fwdflat_words += nw; /* Scan all active words. */ for (i = 0; i < nw; i++) { w = *(awl++); rhmm = (root_chan_t *) ngs->word_chan[w]; if (hmm_frame(&rhmm->hmm) == frame_idx) { int32 score = chan_v_eval(rhmm); if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs))) bestscore = score; ngs->st.n_fwdflat_chan++; } for (hmm = rhmm->next; hmm; hmm = hmm->next) { if (hmm_frame(&hmm->hmm) == frame_idx) { int32 score = chan_v_eval(hmm); if (score BETTER_THAN bestscore) bestscore = score; ngs->st.n_fwdflat_chan++; } } } ngs->best_score = bestscore; }
static void evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx) { int32 bs = WORST_SCORE; int i; hmm_context_set_senscore(pls->hmmctx, senscr); for (i = 0; i < pls->n_phones; ++i) { hmm_t *hmm = (hmm_t *)&pls->hmms[i]; int32 score; if (hmm_frame(hmm) < frame_idx) continue; score = hmm_vit_eval(hmm); if (score BETTER_THAN bs) { bs = score; } } pls->best_score = bs; }
static int32 evaluate_hmms(state_align_search_t *sas, int16 const *senscr, int frame_idx) { int32 bs = WORST_SCORE; int i; hmm_context_set_senscore(sas->hmmctx, senscr); for (i = 0; i < sas->n_phones; ++i) { hmm_t *hmm = sas->hmms + i; int32 score; if (hmm_frame(hmm) < frame_idx) continue; score = hmm_vit_eval(hmm); if (score BETTER_THAN bs) { bs = score; } } return bs; }
static void phmm_exit(allphone_search_t * allphs, int32 best) { s3cipid_t ci; phmm_t *p; int32 th, nf; history_t *h; blkarray_list_t *history; bin_mdef_t *mdef; int32 curfrm; phmm_t **ci_phmm; int32 *ci2lmwid; th = best + allphs->pbeam; history = allphs->history; mdef = ps_search_acmod(allphs)->mdef; curfrm = allphs->frame; ci_phmm = allphs->ci_phmm; ci2lmwid = allphs->ci2lmwid; nf = curfrm + 1; for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[(unsigned) ci]; p; p = p->next) { if (hmm_frame(&(p->hmm)) == curfrm) { if (hmm_bestscore(&(p->hmm)) >= th) { h = (history_t *) ckd_calloc(1, sizeof(*h)); h->ef = curfrm; h->phmm = p; h->hist = hmm_out_history(&(p->hmm)); h->score = hmm_out_score(&(p->hmm)); if (!allphs->lm) { h->tscore = allphs->inspen; } else { if (h->hist > 0) { int32 n_used; history_t *pred = blkarray_list_get(history, h->hist); if (pred->hist > 0) { history_t *pred_pred = blkarray_list_get(history, h->hist); h->tscore = ngram_tg_score(allphs->lm, ci2lmwid [pred_pred->phmm->ci], ci2lmwid[pred-> phmm->ci], ci2lmwid[p->ci], &n_used) >> SENSCR_SHIFT; } else { h->tscore = ngram_bg_score(allphs->lm, ci2lmwid [pred->phmm->ci], ci2lmwid[p->ci], &n_used) >> SENSCR_SHIFT; } } else { /* * This is the beginning SIL and in srch_allphone_begin() * it's inscore is set to 0. */ h->tscore = 0; } }
static void fwdflat_word_transition(ngram_search_t *ngs, int frame_idx) { int32 cf, nf, b, thresh, pip, i, w, newscore; int32 best_silrc_score = 0, best_silrc_bp = 0; /* FIXME: good defaults? */ bptbl_t *bp; int32 *rcss; root_chan_t *rhmm; int32 *awl; float32 lwf; dict_t *dict = ps_search_dict(ngs); dict2pid_t *d2p = ps_search_dict2pid(ngs); cf = frame_idx; nf = cf + 1; thresh = ngs->best_score + ngs->fwdflatbeam; pip = ngs->pip; best_silrc_score = WORST_SCORE; lwf = ngs->fwdflat_fwdtree_lw_ratio; /* Search for all words starting within a window of this frame. * These are the successors for words exiting now. */ get_expand_wordlist(ngs, cf, ngs->max_sf_win); /* Scan words exited in current frame */ for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) { xwdssid_t *rssid; int32 silscore; bp = ngs->bp_table + b; ngs->word_lat_idx[bp->wid] = NO_BP; if (bp->wid == ps_search_finish_wid(ngs)) continue; /* DICT2PID location */ /* Get the mapping from right context phone ID to index in the * right context table and the bscore_stack. */ rcss = ngs->bscore_stack + bp->s_idx; if (bp->last2_phone == -1) rssid = NULL; else rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone); /* Transition to all successor words. */ for (i = 0; ngs->expand_word_list[i] >= 0; i++) { int32 n_used; w = ngs->expand_word_list[i]; /* Get the exit score we recorded in save_bwd_ptr(), or * something approximating it. */ if (rssid) newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]]; else newscore = bp->score; if (newscore == WORST_SCORE) continue; /* FIXME: Floating point... */ newscore += lwf * (ngram_tg_score(ngs->lmset, dict_basewid(dict, w), bp->real_wid, bp->prev_real_wid, &n_used) >> SENSCR_SHIFT); newscore += pip; /* Enter the next word */ if (newscore BETTER_THAN thresh) { rhmm = (root_chan_t *) ngs->word_chan[w]; if ((hmm_frame(&rhmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { hmm_enter(&rhmm->hmm, newscore, b, nf); /* DICT2PID: This is where mpx ssids get introduced. */ /* Look up the ssid to use when entering this mpx triphone. */ hmm_mpx_ssid(&rhmm->hmm, 0) = dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, dict_last_phone(dict, bp->wid)); assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0))); E_DEBUG(6,("ssid %d(%d,%d) = %d\n", rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone, hmm_mpx_ssid(&rhmm->hmm, 0))); bitvec_set(ngs->word_active, w); } } } /* Get the best exit into silence. */ if (rssid) silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]]; else silscore = bp->score; if (silscore BETTER_THAN best_silrc_score) { best_silrc_score = silscore; best_silrc_bp = b; } } /* Transition to <sil> */ newscore = best_silrc_score + ngs->silpen + pip; if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { w = ps_search_silence_wid(ngs); rhmm = (root_chan_t *) ngs->word_chan[w]; if ((hmm_frame(&rhmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { hmm_enter(&rhmm->hmm, newscore, best_silrc_bp, nf); bitvec_set(ngs->word_active, w); } } /* Transition to noise words */ newscore = best_silrc_score + ngs->fillpen + pip; if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) { rhmm = (root_chan_t *) ngs->word_chan[w]; /* Noise words that aren't a single phone will have NULL here. */ if (rhmm == NULL) continue; if ((hmm_frame(&rhmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { hmm_enter(&rhmm->hmm, newscore, best_silrc_bp, nf); bitvec_set(ngs->word_active, w); } } } /* Reset initial channels of words that have become inactive even after word trans. */ i = ngs->n_active_word[cf & 0x1]; awl = ngs->active_word_list[cf & 0x1]; for (w = *(awl++); i > 0; --i, w = *(awl++)) { rhmm = (root_chan_t *) ngs->word_chan[w]; if (hmm_frame(&rhmm->hmm) == cf) { hmm_clear_scores(&rhmm->hmm); } } }
static void fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx) { int32 i, cf, nf, w, pip, newscore, thresh, wordthresh; int32 *awl; root_chan_t *rhmm; chan_t *hmm, *nexthmm; cf = frame_idx; nf = cf + 1; i = ngs->n_active_word[cf & 0x1]; awl = ngs->active_word_list[cf & 0x1]; bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); thresh = ngs->best_score + ngs->fwdflatbeam; wordthresh = ngs->best_score + ngs->fwdflatwbeam; pip = ngs->pip; E_DEBUG(3,("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh)); /* Scan all active words. */ for (w = *(awl++); i > 0; --i, w = *(awl++)) { rhmm = (root_chan_t *) ngs->word_chan[w]; /* Propagate active root channels */ if (hmm_frame(&rhmm->hmm) == cf && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) { hmm_frame(&rhmm->hmm) = nf; bitvec_set(ngs->word_active, w); /* Transitions out of root channel */ newscore = hmm_out_score(&rhmm->hmm); if (rhmm->next) { assert(!dict_is_single_phone(ps_search_dict(ngs), w)); newscore += pip; if (newscore BETTER_THAN thresh) { hmm = rhmm->next; /* Enter all right context phones */ if (hmm->info.rc_id >= 0) { for (; hmm; hmm = hmm->next) { if ((hmm_frame(&hmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) { hmm_enter(&hmm->hmm, newscore, hmm_out_history(&rhmm->hmm), nf); } } } /* Just a normal word internal phone */ else { if ((hmm_frame(&hmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) { hmm_enter(&hmm->hmm, newscore, hmm_out_history(&rhmm->hmm), nf); } } } } else { assert(dict_is_single_phone(ps_search_dict(ngs), w)); /* Word exit for single-phone words (where did their * whmms come from?) (either from * ngram_search_fwdtree, or from * ngram_fwdflat_allocate_1ph(), that's where) */ if (newscore BETTER_THAN wordthresh) { ngram_search_save_bp(ngs, cf, w, newscore, hmm_out_history(&rhmm->hmm), 0); } } } /* Transitions out of non-root channels. */ for (hmm = rhmm->next; hmm; hmm = hmm->next) { if (hmm_frame(&hmm->hmm) >= cf) { /* Propagate forward HMMs inside the beam. */ if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) { hmm_frame(&hmm->hmm) = nf; bitvec_set(ngs->word_active, w); newscore = hmm_out_score(&hmm->hmm); /* Word-internal phones */ if (hmm->info.rc_id < 0) { newscore += pip; if (newscore BETTER_THAN thresh) { nexthmm = hmm->next; /* Enter all right-context phones. */ if (nexthmm->info.rc_id >= 0) { for (; nexthmm; nexthmm = nexthmm->next) { if ((hmm_frame(&nexthmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&nexthmm->hmm))) { hmm_enter(&nexthmm->hmm, newscore, hmm_out_history(&hmm->hmm), nf); } } } /* Enter single word-internal phone. */ else { if ((hmm_frame(&nexthmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&nexthmm->hmm))) { hmm_enter(&nexthmm->hmm, newscore, hmm_out_history(&hmm->hmm), nf); } } } } /* Right-context phones - apply word beam and exit. */ else { if (newscore BETTER_THAN wordthresh) { ngram_search_save_bp(ngs, cf, w, newscore, hmm_out_history(&hmm->hmm), hmm->info.rc_id); } } } /* Zero out inactive HMMs. */ else if (hmm_frame(&hmm->hmm) != nf) { hmm_clear_scores(&hmm->hmm); } } } } }