static void
phone_transition(phone_loop_search_t *pls, int frame_idx)
{
    int32 thresh = pls->best_score + pls->pbeam;
    int nf = frame_idx + 1;
    int i;

    /* Now transition out of phones whose last states are inside the
     * phone transition beam. */
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->phones[i];
        int32 newphone_score;
        int j;

        if (hmm_frame(hmm) != nf)
            continue;

        newphone_score = hmm_out_score(hmm) + pls->pip;
        if (newphone_score BETTER_THAN thresh) {
            /* Transition into all phones using the usual Viterbi rule. */
            for (j = 0; j < pls->n_phones; ++j) {
                hmm_t *nhmm = (hmm_t *)&pls->phones[j];

                if (hmm_frame(nhmm) < frame_idx
                    || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
                    hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
                }
            }
        }
    }
}
static void
fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
{
    root_chan_t *rhmm;
    chan_t *hmm;
    int32 i, cf, w, *awl;

    cf = frame_idx;

    /* Renormalize individual word channels */
    i = ngs->n_active_word[cf & 0x1];
    awl = ngs->active_word_list[cf & 0x1];
    for (w = *(awl++); i > 0; --i, w = *(awl++)) {
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if (hmm_frame(&rhmm->hmm) == cf) {
            hmm_normalize(&rhmm->hmm, norm);
        }
        for (hmm = rhmm->next; hmm; hmm = hmm->next) {
            if (hmm_frame(&hmm->hmm) == cf) {
                hmm_normalize(&hmm->hmm, norm);
            }
        }
    }

    ngs->renormalized = TRUE;
}
static void
compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx)
{
    int32 i, w;
    int32 *awl;
    root_chan_t *rhmm;
    chan_t *hmm;

    acmod_clear_active(ps_search_acmod(ngs));

    i = ngs->n_active_word[frame_idx & 0x1];
    awl = ngs->active_word_list[frame_idx & 0x1];

    for (w = *(awl++); i > 0; --i, w = *(awl++)) {
        rhmm = (root_chan_t *)ngs->word_chan[w];
        if (hmm_frame(&rhmm->hmm) == frame_idx) {
            acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
        }

        for (hmm = rhmm->next; hmm; hmm = hmm->next) {
            if (hmm_frame(&hmm->hmm) == frame_idx) {
                acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
            }
        }
    }
}
예제 #4
0
static void
prune_hmms(state_align_search_t *sas, int frame_idx)
{
    int nf = frame_idx + 1;
    int i;

    /* Check all phones to see if they remain active in the next frame. */
    for (i = 0; i < sas->n_phones; ++i) {
        hmm_t *hmm = sas->hmms + i;
        if (hmm_frame(hmm) < frame_idx)
            continue;
        hmm_frame(hmm) = nf;
    }
}
예제 #5
0
void
hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
{
    hmm_in_score(h) = score;
    hmm_in_history(h) = histid;
    hmm_frame(h) = frame;
}
/** Evaluate active PHMMs */
static int32
phmm_eval_all(allphone_search_t * allphs, const int16 * senscr)
{
    s3cipid_t ci;
    phmm_t *p;
    int32 best;
    bin_mdef_t *mdef;
    phmm_t **ci_phmm;

    mdef = ((ps_search_t *) allphs)->acmod->mdef;
    ci_phmm = allphs->ci_phmm;

    best = WORST_SCORE;

    hmm_context_set_senscore(allphs->hmmctx, senscr);
    for (ci = 0; ci < mdef->n_ciphone; ci++) {
        for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
            if (hmm_frame(&(p->hmm)) == allphs->frame) {
                int32 score;
                allphs->n_hmm_eval++;
                score = hmm_vit_eval((hmm_t *) p);
                if (score > best)
                    best = score;
            }
        }
    }

    return best;
}
예제 #7
0
static void
record_transitions(state_align_search_t *sas, int frame_idx)
{
    state_align_hist_t *tokens;
    int i;

    /* Push another frame of tokens on the stack. */
    extend_tokenstack(sas, frame_idx);
    tokens = sas->tokens + frame_idx * sas->n_emit_state;

    /* Scan all active HMMs */
    for (i = 0; i < sas->n_phones; ++i) {
        hmm_t *hmm = sas->hmms + i;
        int j;

        if (hmm_frame(hmm) < frame_idx)
            continue;
        for (j = 0; j < sas->hmmctx->n_emit_state; ++j) {
            int state_idx = i * sas->hmmctx->n_emit_state + j;
            /* Record their backpointers on the token stack. */
            tokens[state_idx].id = hmm_history(hmm, j);
            tokens[state_idx].score = hmm_score(hmm, j);
            /* Update backpointer fields with state index. */
            hmm_history(hmm, j) = state_idx;
        }
    }
}
예제 #8
0
파일: hmm.c 프로젝트: 4auka/cmusphinx
void
hmm_enter_obj(hmm_t *h, int32 score, void *histobj, int32 frame)
{
    hmm_in_score(h) = score;
    hmm_in_histobj(h) = histobj;
    hmm_frame(h) = frame;
}
static void
prune_hmms(phone_loop_search_t *pls, int frame_idx)
{
    int32 thresh = pls->best_score + pls->beam;
    int nf = frame_idx + 1;
    int i;

    /* Check all phones to see if they remain active in the next frame. */
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->phones[i];

        if (hmm_frame(hmm) < frame_idx)
            continue;
        /* Retain if score better than threshold. */
        if (hmm_bestscore(hmm) BETTER_THAN thresh) {
            hmm_frame(hmm) = nf;
        }
        else
            hmm_clear_scores(hmm);
    }
}
예제 #10
0
static void
phone_transition(state_align_search_t *sas, int frame_idx)
{
    int nf = frame_idx + 1;
    int i;

    for (i = 0; i < sas->n_phones - 1; ++i) {
        hmm_t *hmm, *nhmm;
        int32 newphone_score;

        hmm = sas->hmms + i;
        if (hmm_frame(hmm) != nf)
            continue;

        newphone_score = hmm_out_score(hmm);
        /* Transition into next phone using the usual Viterbi rule. */
        nhmm = hmm + 1;
        if (hmm_frame(nhmm) < frame_idx
            || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
            hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
        }
    }
}
static void
fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx)
{
    int32 i, w, nw, bestscore;
    int32 *awl;
    root_chan_t *rhmm;
    chan_t *hmm;

    nw = ngs->n_active_word[frame_idx & 0x1];
    awl = ngs->active_word_list[frame_idx & 0x1];
    bestscore = WORST_SCORE;

    ngs->st.n_fwdflat_words += nw;

    /* Scan all active words. */
    for (i = 0; i < nw; i++) {
        w = *(awl++);
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if (hmm_frame(&rhmm->hmm) == frame_idx) {
            int32 score = chan_v_eval(rhmm);
            if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs)))
                bestscore = score;
            ngs->st.n_fwdflat_chan++;
        }

        for (hmm = rhmm->next; hmm; hmm = hmm->next) {
            if (hmm_frame(&hmm->hmm) == frame_idx) {
                int32 score = chan_v_eval(hmm);
                if (score BETTER_THAN bestscore)
                    bestscore = score;
                ngs->st.n_fwdflat_chan++;
            }
        }
    }

    ngs->best_score = bestscore;
}
static void
evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
{
    int32 bs = WORST_SCORE;
    int i;

    hmm_context_set_senscore(pls->hmmctx, senscr);

    for (i = 0; i < pls->n_phones; ++i) {
        hmm_t *hmm = (hmm_t *)&pls->hmms[i];
        int32 score;

        if (hmm_frame(hmm) < frame_idx)
            continue;
        score = hmm_vit_eval(hmm);
        if (score BETTER_THAN bs) {
            bs = score;
        }
    }
    pls->best_score = bs;
}
예제 #13
0
static int32
evaluate_hmms(state_align_search_t *sas, int16 const *senscr, int frame_idx)
{
    int32 bs = WORST_SCORE;
    int i;

    hmm_context_set_senscore(sas->hmmctx, senscr);

    for (i = 0; i < sas->n_phones; ++i) {
        hmm_t *hmm = sas->hmms + i;
        int32 score;

        if (hmm_frame(hmm) < frame_idx)
            continue;
        score = hmm_vit_eval(hmm);
        if (score BETTER_THAN bs) {
            bs = score;
        }
    }
    return bs;
}
예제 #14
0
static void
phmm_exit(allphone_search_t * allphs, int32 best)
{
    s3cipid_t ci;
    phmm_t *p;
    int32 th, nf;
    history_t *h;
    blkarray_list_t *history;
    bin_mdef_t *mdef;
    int32 curfrm;
    phmm_t **ci_phmm;
    int32 *ci2lmwid;

    th = best + allphs->pbeam;

    history = allphs->history;
    mdef = ps_search_acmod(allphs)->mdef;
    curfrm = allphs->frame;
    ci_phmm = allphs->ci_phmm;
    ci2lmwid = allphs->ci2lmwid;

    nf = curfrm + 1;

    for (ci = 0; ci < mdef->n_ciphone; ci++) {
        for (p = ci_phmm[(unsigned) ci]; p; p = p->next) {
            if (hmm_frame(&(p->hmm)) == curfrm) {

                if (hmm_bestscore(&(p->hmm)) >= th) {

                    h = (history_t *) ckd_calloc(1, sizeof(*h));
                    h->ef = curfrm;
                    h->phmm = p;
                    h->hist = hmm_out_history(&(p->hmm));
                    h->score = hmm_out_score(&(p->hmm));

                    if (!allphs->lm) {
                        h->tscore = allphs->inspen;
                    }
                    else {
                        if (h->hist > 0) {
                            int32 n_used;
                            history_t *pred =
                                blkarray_list_get(history, h->hist);

                            if (pred->hist > 0) {
                                history_t *pred_pred =
                                    blkarray_list_get(history,
                                                      h->hist);
                                h->tscore =
                                    ngram_tg_score(allphs->lm,
                                                   ci2lmwid
                                                   [pred_pred->phmm->ci],
                                                   ci2lmwid[pred->
                                                            phmm->ci],
                                                   ci2lmwid[p->ci],
                                                   &n_used) >>
                                    SENSCR_SHIFT;
                            }
                            else {
                                h->tscore =
                                    ngram_bg_score(allphs->lm,
                                                   ci2lmwid
                                                   [pred->phmm->ci],
                                                   ci2lmwid[p->ci],
                                                   &n_used) >>
                                    SENSCR_SHIFT;
                            }
                        }
                        else {
                            /*
                             * This is the beginning SIL and in srch_allphone_begin()
                             * it's inscore is set to 0.
                             */
                            h->tscore = 0;
                        }
                    }
static void
fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
{
    int32 cf, nf, b, thresh, pip, i, w, newscore;
    int32 best_silrc_score = 0, best_silrc_bp = 0;      /* FIXME: good defaults? */
    bptbl_t *bp;
    int32 *rcss;
    root_chan_t *rhmm;
    int32 *awl;
    float32 lwf;
    dict_t *dict = ps_search_dict(ngs);
    dict2pid_t *d2p = ps_search_dict2pid(ngs);

    cf = frame_idx;
    nf = cf + 1;
    thresh = ngs->best_score + ngs->fwdflatbeam;
    pip = ngs->pip;
    best_silrc_score = WORST_SCORE;
    lwf = ngs->fwdflat_fwdtree_lw_ratio;

    /* Search for all words starting within a window of this frame.
     * These are the successors for words exiting now. */
    get_expand_wordlist(ngs, cf, ngs->max_sf_win);

    /* Scan words exited in current frame */
    for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
        xwdssid_t *rssid;
        int32 silscore;

        bp = ngs->bp_table + b;
        ngs->word_lat_idx[bp->wid] = NO_BP;

        if (bp->wid == ps_search_finish_wid(ngs))
            continue;

        /* DICT2PID location */
        /* Get the mapping from right context phone ID to index in the
         * right context table and the bscore_stack. */
        rcss = ngs->bscore_stack + bp->s_idx;
        if (bp->last2_phone == -1)
            rssid = NULL;
        else
            rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone);

        /* Transition to all successor words. */
        for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
            int32 n_used;

            w = ngs->expand_word_list[i];

            /* Get the exit score we recorded in save_bwd_ptr(), or
             * something approximating it. */
            if (rssid)
                newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]];
            else
                newscore = bp->score;
            if (newscore == WORST_SCORE)
                continue;
            /* FIXME: Floating point... */
            newscore += lwf
                * (ngram_tg_score(ngs->lmset,
                                  dict_basewid(dict, w),
                                  bp->real_wid,
                                  bp->prev_real_wid,
                                  &n_used) >> SENSCR_SHIFT);
            newscore += pip;

            /* Enter the next word */
            if (newscore BETTER_THAN thresh) {
                rhmm = (root_chan_t *) ngs->word_chan[w];
                if ((hmm_frame(&rhmm->hmm) < cf)
                    || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
                    hmm_enter(&rhmm->hmm, newscore, b, nf);
                    /* DICT2PID: This is where mpx ssids get introduced. */
                    /* Look up the ssid to use when entering this mpx triphone. */
                    hmm_mpx_ssid(&rhmm->hmm, 0) =
                        dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
                                          dict_last_phone(dict, bp->wid));
                    assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0)));
                    E_DEBUG(6,("ssid %d(%d,%d) = %d\n",
                               rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone,
                               hmm_mpx_ssid(&rhmm->hmm, 0)));
                    bitvec_set(ngs->word_active, w);
                }
            }
        }

        /* Get the best exit into silence. */
        if (rssid)
            silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]];
        else
            silscore = bp->score;
        if (silscore BETTER_THAN best_silrc_score) {
            best_silrc_score = silscore;
            best_silrc_bp = b;
        }
    }

    /* Transition to <sil> */
    newscore = best_silrc_score + ngs->silpen + pip;
    if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
        w = ps_search_silence_wid(ngs);
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if ((hmm_frame(&rhmm->hmm) < cf)
            || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
            hmm_enter(&rhmm->hmm, newscore,
                      best_silrc_bp, nf);
            bitvec_set(ngs->word_active, w);
        }
    }
    /* Transition to noise words */
    newscore = best_silrc_score + ngs->fillpen + pip;
    if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
        for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
            rhmm = (root_chan_t *) ngs->word_chan[w];
            /* Noise words that aren't a single phone will have NULL here. */
            if (rhmm == NULL)
                continue;
            if ((hmm_frame(&rhmm->hmm) < cf)
                || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
                hmm_enter(&rhmm->hmm, newscore,
                          best_silrc_bp, nf);
                bitvec_set(ngs->word_active, w);
            }
        }
    }

    /* Reset initial channels of words that have become inactive even after word trans. */
    i = ngs->n_active_word[cf & 0x1];
    awl = ngs->active_word_list[cf & 0x1];
    for (w = *(awl++); i > 0; --i, w = *(awl++)) {
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if (hmm_frame(&rhmm->hmm) == cf) {
            hmm_clear_scores(&rhmm->hmm);
        }
    }
}
static void
fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx)
{
    int32 i, cf, nf, w, pip, newscore, thresh, wordthresh;
    int32 *awl;
    root_chan_t *rhmm;
    chan_t *hmm, *nexthmm;

    cf = frame_idx;
    nf = cf + 1;
    i = ngs->n_active_word[cf & 0x1];
    awl = ngs->active_word_list[cf & 0x1];
    bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));

    thresh = ngs->best_score + ngs->fwdflatbeam;
    wordthresh = ngs->best_score + ngs->fwdflatwbeam;
    pip = ngs->pip;
    E_DEBUG(3,("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh));

    /* Scan all active words. */
    for (w = *(awl++); i > 0; --i, w = *(awl++)) {
        rhmm = (root_chan_t *) ngs->word_chan[w];
        /* Propagate active root channels */
        if (hmm_frame(&rhmm->hmm) == cf
            && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
            hmm_frame(&rhmm->hmm) = nf;
            bitvec_set(ngs->word_active, w);

            /* Transitions out of root channel */
            newscore = hmm_out_score(&rhmm->hmm);
            if (rhmm->next) {
                assert(!dict_is_single_phone(ps_search_dict(ngs), w));

                newscore += pip;
                if (newscore BETTER_THAN thresh) {
                    hmm = rhmm->next;
                    /* Enter all right context phones */
                    if (hmm->info.rc_id >= 0) {
                        for (; hmm; hmm = hmm->next) {
                            if ((hmm_frame(&hmm->hmm) < cf)
                                || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
                                hmm_enter(&hmm->hmm, newscore,
                                          hmm_out_history(&rhmm->hmm), nf);
                            }
                        }
                    }
                    /* Just a normal word internal phone */
                    else {
                        if ((hmm_frame(&hmm->hmm) < cf)
                            || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
                                hmm_enter(&hmm->hmm, newscore,
                                          hmm_out_history(&rhmm->hmm), nf);
                        }
                    }
                }
            }
            else {
                assert(dict_is_single_phone(ps_search_dict(ngs), w));

                /* Word exit for single-phone words (where did their
                 * whmms come from?) (either from
                 * ngram_search_fwdtree, or from
                 * ngram_fwdflat_allocate_1ph(), that's where) */
                if (newscore BETTER_THAN wordthresh) {
                    ngram_search_save_bp(ngs, cf, w, newscore,
                                         hmm_out_history(&rhmm->hmm), 0);
                }
            }
        }

        /* Transitions out of non-root channels. */
        for (hmm = rhmm->next; hmm; hmm = hmm->next) {
            if (hmm_frame(&hmm->hmm) >= cf) {
                /* Propagate forward HMMs inside the beam. */
                if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
                    hmm_frame(&hmm->hmm) = nf;
                    bitvec_set(ngs->word_active, w);

                    newscore = hmm_out_score(&hmm->hmm);
                    /* Word-internal phones */
                    if (hmm->info.rc_id < 0) {
                        newscore += pip;
                        if (newscore BETTER_THAN thresh) {
                            nexthmm = hmm->next;
                            /* Enter all right-context phones. */
                            if (nexthmm->info.rc_id >= 0) {
                                 for (; nexthmm; nexthmm = nexthmm->next) {
                                    if ((hmm_frame(&nexthmm->hmm) < cf)
                                        || (newscore BETTER_THAN
                                            hmm_in_score(&nexthmm->hmm))) {
                                        hmm_enter(&nexthmm->hmm,
                                                  newscore,
                                                  hmm_out_history(&hmm->hmm),
                                                  nf);
                                    }
                                }
                            }
                            /* Enter single word-internal phone. */
                            else {
                                if ((hmm_frame(&nexthmm->hmm) < cf)
                                    || (newscore BETTER_THAN
                                        hmm_in_score(&nexthmm->hmm))) {
                                    hmm_enter(&nexthmm->hmm, newscore,
                                              hmm_out_history(&hmm->hmm), nf);
                                }
                            }
                        }
                    }
                    /* Right-context phones - apply word beam and exit. */
                    else {
                        if (newscore BETTER_THAN wordthresh) {
                            ngram_search_save_bp(ngs, cf, w, newscore,
                                                 hmm_out_history(&hmm->hmm),
                                                 hmm->info.rc_id);
                        }
                    }
                }
                /* Zero out inactive HMMs. */
                else if (hmm_frame(&hmm->hmm) != nf) {
                    hmm_clear_scores(&hmm->hmm);
                }
            }
        }
    }
}