static int32 lm3g_bg_score(NGRAM_MODEL_TYPE *model, int32 lw1, int32 lw2, int32 *n_used) { int32 i, n, b, score; bigram_t *bg; if (lw1 < 0 || model->base.n < 2) { *n_used = 1; return model->lm3g.unigrams[lw2].prob1.l; } b = FIRST_BG(model, lw1); n = FIRST_BG(model, lw1 + 1) - b; bg = model->lm3g.bigrams + b; if ((i = find_bg(bg, n, lw2)) >= 0) { /* Access mode = bigram */ *n_used = 2; score = model->lm3g.prob2[bg[i].prob2].l; } else { /* Access mode = unigram */ *n_used = 1; score = model->lm3g.unigrams[lw1].bo_wt1.l + model->lm3g.unigrams[lw2].prob1.l; } return (score); }
void trace_freq(TRACE *data, int ndata) { int i, bg; bg = find_bg(data, ndata); #define MAX(a,b) ((a)>(b)?(a):(b)) for (i = 0; i < ndata; i++) { data[i] = MAX(data[i] - bg, 0); } }
static void load_tginfo(NGRAM_MODEL_TYPE *model, int32 lw1, int32 lw2) { int32 i, n, b, t; bigram_t *bg; tginfo_t *tginfo; /* First allocate space for tg information for bg lw1,lw2 */ tginfo = (tginfo_t *) listelem_malloc(model->lm3g.le); tginfo->w1 = lw1; tginfo->tg = NULL; tginfo->next = model->lm3g.tginfo[lw2]; model->lm3g.tginfo[lw2] = tginfo; /* Locate bigram lw1,lw2 */ b = model->lm3g.unigrams[lw1].bigrams; n = model->lm3g.unigrams[lw1 + 1].bigrams - b; bg = model->lm3g.bigrams + b; if ((n > 0) && ((i = find_bg(bg, n, lw2)) >= 0)) { tginfo->bowt = model->lm3g.bo_wt2[bg[i].bo_wt2].l; /* Find t = Absolute first trigram index for bigram lw1,lw2 */ b += i; /* b = Absolute index of bigram lw1,lw2 on disk */ t = FIRST_TG(model, b); tginfo->tg = model->lm3g.trigrams + t; /* Find #tg for bigram w1,w2 */ tginfo->n_tg = FIRST_TG(model, b + 1) - t; } else { /* No bigram w1,w2 */ tginfo->bowt = 0; tginfo->n_tg = 0; } }
static ngram_iter_t * lm3g_template_iter(ngram_model_t *base, int32 wid, int32 *history, int32 n_hist) { NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); ngram_iter_init((ngram_iter_t *)itor, base, n_hist, FALSE); if (n_hist == 0) { /* Unigram is the easiest. */ itor->ug = model->lm3g.unigrams + wid; return (ngram_iter_t *)itor; } else if (n_hist == 1) { int32 i, n, b; /* Find the bigram, as in bg_score above (duplicate code...) */ itor->ug = model->lm3g.unigrams + history[0]; b = FIRST_BG(model, history[0]); n = FIRST_BG(model, history[0] + 1) - b; itor->bg = model->lm3g.bigrams + b; /* If no such bigram exists then fail. */ if ((i = find_bg(itor->bg, n, wid)) < 0) { ngram_iter_free((ngram_iter_t *)itor); return NULL; } itor->bg += i; return (ngram_iter_t *)itor; } else if (n_hist == 2) { int32 i, n; tginfo_t *tginfo, *prev_tginfo; /* Find the trigram, as in tg_score above (duplicate code...) */ itor->ug = model->lm3g.unigrams + history[1]; prev_tginfo = NULL; for (tginfo = model->lm3g.tginfo[history[0]]; tginfo; tginfo = tginfo->next) { if (tginfo->w1 == history[1]) break; prev_tginfo = tginfo; } if (!tginfo) { load_tginfo(model, history[1], history[0]); tginfo = model->lm3g.tginfo[history[0]]; } else if (prev_tginfo) { prev_tginfo->next = tginfo->next; tginfo->next = model->lm3g.tginfo[history[0]]; model->lm3g.tginfo[history[0]] = tginfo; } tginfo->used = 1; /* Trigrams for w1,w2 now pointed to by tginfo */ n = tginfo->n_tg; itor->tg = tginfo->tg; if ((i = find_tg(itor->tg, n, wid)) >= 0) { itor->tg += i; /* Now advance the bigram pointer accordingly. FIXME: * Note that we actually already found the relevant bigram * in load_tginfo. */ itor->bg = model->lm3g.bigrams; while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1)) <= (itor->tg - model->lm3g.trigrams)) ++itor->bg; return (ngram_iter_t *)itor; } else { ngram_iter_free((ngram_iter_t *)itor); return (ngram_iter_t *)NULL; } } else { /* Should not happen. */ assert(n_hist == 0); /* Guaranteed to fail. */ ngram_iter_free((ngram_iter_t *)itor); return NULL; } }