void vithist_rescore (vithist_t *vh, kbcore_t *kbc, s3wid_t wid, int32 ef, int32 score, int32 pred, int32 type) { vithist_entry_t *pve, tve; s3lmwid_t lwid; int32 se, fe; int32 i; assert (vh->n_frm == ef); pve = vh->entry[VITHIST_ID2BLK(pred)] + VITHIST_ID2BLKOFFSET(pred); /* Create a temporary entry with all the info currently available */ tve.wid = wid; tve.sf = pve->ef + 1; tve.ef = ef; tve.type = type; tve.valid = 1; tve.ascr = score - pve->score; if (pred == 0) { /* Special case for the initial <s> entry */ se = 0; fe = 1; } else { se = vh->frame_start[pve->ef]; fe = vh->frame_start[pve->ef + 1]; } if (dict_filler_word (kbcore_dict(kbc), wid)) { tve.lscr = fillpen (kbcore_fillpen(kbc), wid); tve.score = score + tve.lscr; tve.pred = pred; tve.lmstate.lm3g = pve->lmstate.lm3g; vithist_enter (vh, kbc, &tve); } else { lwid = kbcore_dict2lmwid (kbc, wid); tve.lmstate.lm3g.lwid[0] = lwid; for (i = se; i < fe; i++) { pve = vh->entry[VITHIST_ID2BLK(i)] + VITHIST_ID2BLKOFFSET(i); if (pve->valid) { tve.lscr = lm_tg_score (kbcore_lm(kbc), pve->lmstate.lm3g.lwid[1], pve->lmstate.lm3g.lwid[0], lwid); tve.score = pve->score + tve.ascr + tve.lscr; if ((tve.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) { tve.pred = i; tve.lmstate.lm3g.lwid[1] = pve->lmstate.lm3g.lwid[0]; vithist_enter (vh, kbc, &tve); } } } } }
void vithist_rescore(vithist_t * vh, ngram_model_t *lm, s3dict_t *dict, dict2pid_t *dict2pid, fillpen_t *fp, s3wid_t wid, int32 ef, int32 score, int32 pred, int32 type, int32 rc) { vithist_entry_t *pve, tve; int32 lwid; int32 se, fe; int32 i; assert(vh->n_frm == ef); if (pred == -1) { /* Always do E_FATAL assuming upper level function take care of error checking. */ E_FATAL ("Hmm->out.history equals to -1 with score %d, some active phone was not computed?\n", score); } /* pve is the previous entry before word with wid or, se an fe is the first to the last entry before pve. So pve is w_{n-1} */ pve = vithist_id2entry(vh, pred); /* Create a temporary entry with all the info currently available */ tve.wid = wid; tve.sf = pve->ef + 1; tve.ef = ef; tve.type = type; tve.valid = 1; tve.ascr = score - pve->path.score; tve.lscr = 0; tve.rc = NULL; tve.n_rc = 0; /* Filler words only have unigram language model scores, so not * much special needs to be done for them. vithist_prune() is * going to prune out most of these later on, anyway. */ if (s3dict_filler_word(dict, wid)) { tve.path.score = score; tve.lscr = fillpen(fp, wid); tve.path.score += tve.lscr; if ((tve.path.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) { tve.path.pred = pred; /* Note that they just propagate the same LM state since * they are not in the LM. */ tve.lmstate.lm3g = pve->lmstate.lm3g; vithist_enter(vh, dict, dict2pid, &tve, rc); } } else { if (pred == 0) { /* Special case for the initial <s> entry */ se = 0; fe = 1; } else { se = vh->frame_start[pve->ef]; fe = vh->frame_start[pve->ef + 1]; } /* Now if it is a word, backtrack again to get all possible previous word So pve becomes the w_{n-2}. */ lwid = ngram_wid(lm, s3dict_wordstr(dict, s3dict_basewid(dict, wid))); tve.lmstate.lm3g.lwid[0] = lwid; /* FIXME: This loop is completely awful. For each entry in * this frame, we scan every entry in the previous frame, * potentially creating a new history entry. This means that * without pruning, the size of the vithist table (and thus * the time taken here) is exponential in the number of * frames! */ for (i = se; i < fe; i++) { pve = vithist_id2entry(vh, i); if (pve->valid) { int n_used; tve.path.score = pve->path.score + tve.ascr; /* Try at all costs to avoid calling ngram_tg_score() * because it is the main time consuming part here * (but as noted above... ugh...) See below as well. */ if ((tve.path.score - vh->wbeam) < vh->bestscore[vh->n_frm]) continue; /* The trigram cache is supposed to make this fast, * but due to the crazy number of times this could be * called, it's still slow compared to a hash * table. */ tve.lscr = ngram_tg_score(lm, lwid, pve->lmstate.lm3g.lwid[0], pve->lmstate.lm3g.lwid[1], &n_used); tve.path.score += tve.lscr; /* A different word exit threshold - we would have to * be inside the general word beam in order to get * here, now we apply a second beam to the *vithist * entries* in this frame. There can be an ungodly * number of them for reasons that aren't entirely * clear to me, so this is kind of a pre-pruning. * NOTE: the "backwards" math here is because * vh->bestscore is frequently MAX_NEG_INT32. ALSO * NOTE: We can't precompute the threshold since the * best score will be updated by vithist_enter(). */ if ((tve.path.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) { tve.path.pred = i; tve.lmstate.lm3g.lwid[1] = pve->lmstate.lm3g.lwid[0]; vithist_enter(vh, dict, dict2pid, &tve, rc); } } } } }