void run_tests(ngram_model_t *model) { int32 n_used; ngram_tg_score(model, ngram_wid(model, "daines"), ngram_wid(model, "huggins"), ngram_wid(model, "huggins"), &n_used); TEST_EQUAL(n_used, 2); ngram_tg_score(model, ngram_wid(model, "david"), ngram_wid(model, "david"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); /* Apply weights. */ ngram_model_apply_weights(model, 7.5, 0.5, 1.0); /* -9452 * 7.5 + log(0.5) = -77821 */ TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -77821); /* Recover original score. */ TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_probv(model, "huggins", "david", NULL), -831); /* Un-apply weights. */ ngram_model_apply_weights(model, 1.0, 1.0, 1.0); TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831); /* Recover original score. */ TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL), -9452); /* Pre-weighting, this should give the "raw" score. */ TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831); /* Verify that backoff mode calculations work. */ ngram_bg_score(model, ngram_wid(model, "huggins"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 2); ngram_bg_score(model, ngram_wid(model, "blorglehurfle"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); ngram_bg_score(model, ngram_wid(model, "david"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); ngram_tg_score(model, ngram_wid(model, "daines"), ngram_wid(model, "huggins"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 3); }
int32 vithist_partialutt_end(vithist_t * vh, ngram_model_t *lm, s3dict_t *dict) { int32 f, i; int32 sv, nsv, scr, bestscore, bestvh; vithist_entry_t *ve, *bestve; int32 endwid; /* Find last frame with entries in vithist table */ for (f = vh->n_frm - 1; f >= 0; --f) { sv = vh->frame_start[f]; /* First vithist entry in frame f */ nsv = vh->frame_start[f + 1]; /* First vithist entry in next frame (f+1) */ if (sv < nsv) break; } if (f < 0) return -1; if (f != vh->n_frm - 1) { E_ERROR("No word exits from in block with last frame= %d\n", vh->n_frm - 1); return -1; } /* Terminate in a final </s> node (make this optional?) */ endwid = ngram_wid(lm, S3_FINISH_WORD); bestscore = MAX_NEG_INT32; bestvh = -1; for (i = sv; i < nsv; i++) { int n_used; ve = vithist_id2entry(vh, i); scr = ve->path.score; scr += ngram_tg_score(lm, endwid, ve->lmstate.lm3g.lwid[0], ve->lmstate.lm3g.lwid[1], &n_used); if (bestscore < scr) { bestscore = scr; bestvh = i; bestve = ve; } } return bestvh; }
int32 vithist_utt_end(vithist_t * vh, ngram_model_t *lm, s3dict_t *dict, dict2pid_t *dict2pid, fillpen_t *fp) { int32 f, i; int32 sv, nsv, scr, bestscore, bestvh, vhid; vithist_entry_t *ve, *bestve = 0; int32 endwid = NGRAM_INVALID_WID; bestscore = MAX_NEG_INT32; bestvh = -1; /* Find last frame with entries in vithist table */ /* by ARCHAN 20050525, it is possible that the last frame will not be reached in decoding */ for (f = vh->n_frm - 1; f >= 0; --f) { sv = vh->frame_start[f]; /* First vithist entry in frame f */ nsv = vh->frame_start[f + 1]; /* First vithist entry in next frame (f+1) */ if (sv < nsv) break; } if (f < 0) return -1; if (f != vh->n_frm - 1) E_WARN("No word exit in frame %d, using exits from frame %d\n", vh->n_frm - 1, f); /* Terminate in a final </s> node (make this optional?) */ endwid = ngram_wid(lm, S3_FINISH_WORD); for (i = sv; i < nsv; i++) { int n_used; ve = vithist_id2entry(vh, i); scr = ve->path.score; scr += ngram_tg_score(lm, endwid, ve->lmstate.lm3g.lwid[0], ve->lmstate.lm3g.lwid[1], &n_used); if (bestscore < scr) { bestscore = scr; bestvh = i; bestve = ve; } } assert(bestvh >= 0); if (f != vh->n_frm - 1) { E_ERROR("No word exit in frame %d, using exits from frame %d\n", vh->n_frm - 1, f); /* Add a dummy silwid covering the remainder of the utterance */ assert(vh->frame_start[vh->n_frm - 1] == vh->frame_start[vh->n_frm]); vh->n_frm -= 1; vithist_rescore(vh, lm, dict, dict2pid, fp, s3dict_silwid(dict), vh->n_frm, bestve->path.score, bestvh, -1, -1); vh->n_frm += 1; vh->frame_start[vh->n_frm] = vh->n_entry; return vithist_utt_end(vh, lm, dict, dict2pid, fp); } /* vithist_dump(vh,-1,kbc,stdout); */ /* Create an </s> entry */ ve = vithist_entry_alloc(vh); ve->wid = s3dict_finishwid(dict); ve->sf = (bestve->ef == BAD_S3FRMID) ? 0 : bestve->ef + 1; ve->ef = vh->n_frm; ve->ascr = 0; ve->lscr = bestscore - bestve->path.score; ve->path.score = bestscore; ve->path.pred = bestvh; ve->type = 0; ve->valid = 1; ve->lmstate.lm3g.lwid[0] = endwid; ve->lmstate.lm3g.lwid[1] = ve->lmstate.lm3g.lwid[0]; vhid = vh->n_entry - 1; /* vithist_dump(vh,-1,kbc,stdout); */ return vhid; }
void vithist_rescore(vithist_t * vh, ngram_model_t *lm, s3dict_t *dict, dict2pid_t *dict2pid, fillpen_t *fp, s3wid_t wid, int32 ef, int32 score, int32 pred, int32 type, int32 rc) { vithist_entry_t *pve, tve; int32 lwid; int32 se, fe; int32 i; assert(vh->n_frm == ef); if (pred == -1) { /* Always do E_FATAL assuming upper level function take care of error checking. */ E_FATAL ("Hmm->out.history equals to -1 with score %d, some active phone was not computed?\n", score); } /* pve is the previous entry before word with wid or, se an fe is the first to the last entry before pve. So pve is w_{n-1} */ pve = vithist_id2entry(vh, pred); /* Create a temporary entry with all the info currently available */ tve.wid = wid; tve.sf = pve->ef + 1; tve.ef = ef; tve.type = type; tve.valid = 1; tve.ascr = score - pve->path.score; tve.lscr = 0; tve.rc = NULL; tve.n_rc = 0; /* Filler words only have unigram language model scores, so not * much special needs to be done for them. vithist_prune() is * going to prune out most of these later on, anyway. */ if (s3dict_filler_word(dict, wid)) { tve.path.score = score; tve.lscr = fillpen(fp, wid); tve.path.score += tve.lscr; if ((tve.path.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) { tve.path.pred = pred; /* Note that they just propagate the same LM state since * they are not in the LM. */ tve.lmstate.lm3g = pve->lmstate.lm3g; vithist_enter(vh, dict, dict2pid, &tve, rc); } } else { if (pred == 0) { /* Special case for the initial <s> entry */ se = 0; fe = 1; } else { se = vh->frame_start[pve->ef]; fe = vh->frame_start[pve->ef + 1]; } /* Now if it is a word, backtrack again to get all possible previous word So pve becomes the w_{n-2}. */ lwid = ngram_wid(lm, s3dict_wordstr(dict, s3dict_basewid(dict, wid))); tve.lmstate.lm3g.lwid[0] = lwid; /* FIXME: This loop is completely awful. For each entry in * this frame, we scan every entry in the previous frame, * potentially creating a new history entry. This means that * without pruning, the size of the vithist table (and thus * the time taken here) is exponential in the number of * frames! */ for (i = se; i < fe; i++) { pve = vithist_id2entry(vh, i); if (pve->valid) { int n_used; tve.path.score = pve->path.score + tve.ascr; /* Try at all costs to avoid calling ngram_tg_score() * because it is the main time consuming part here * (but as noted above... ugh...) See below as well. */ if ((tve.path.score - vh->wbeam) < vh->bestscore[vh->n_frm]) continue; /* The trigram cache is supposed to make this fast, * but due to the crazy number of times this could be * called, it's still slow compared to a hash * table. */ tve.lscr = ngram_tg_score(lm, lwid, pve->lmstate.lm3g.lwid[0], pve->lmstate.lm3g.lwid[1], &n_used); tve.path.score += tve.lscr; /* A different word exit threshold - we would have to * be inside the general word beam in order to get * here, now we apply a second beam to the *vithist * entries* in this frame. There can be an ungodly * number of them for reasons that aren't entirely * clear to me, so this is kind of a pre-pruning. * NOTE: the "backwards" math here is because * vh->bestscore is frequently MAX_NEG_INT32. ALSO * NOTE: We can't precompute the threshold since the * best score will be updated by vithist_enter(). */ if ((tve.path.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) { tve.path.pred = i; tve.lmstate.lm3g.lwid[1] = pve->lmstate.lm3g.lwid[0]; vithist_enter(vh, dict, dict2pid, &tve, rc); } } } } }