示例#1
0
static int32
lm3g_tg_score(NGRAM_MODEL_TYPE *model, int32 lw1,
              int32 lw2, int32 lw3, int32 *n_used)
{
    ngram_model_t *base = &model->base;
    int32 i, n, score;
    trigram_t *tg;
    tginfo_t *tginfo, *prev_tginfo;

    if ((base->n < 3) || (lw1 < 0) || (lw2 < 0))
        return (lm3g_bg_score(model, lw2, lw3, n_used));

    sbmtx_lock(model->lm3g.tginfo_mtx);
    prev_tginfo = NULL;
    for (tginfo = model->lm3g.tginfo[lw2]; tginfo; tginfo = tginfo->next) {
        if (tginfo->w1 == lw1)
            break;
        prev_tginfo = tginfo;
    }

    if (!tginfo) {
        load_tginfo(model, lw1, lw2);
        tginfo = model->lm3g.tginfo[lw2];
    }
    else if (prev_tginfo) {
        prev_tginfo->next = tginfo->next;
        tginfo->next = model->lm3g.tginfo[lw2];
        model->lm3g.tginfo[lw2] = tginfo;
    }

    tginfo->used = 1;

    /* Trigrams for w1,w2 now pointed to by tginfo */
    n = tginfo->n_tg;
    tg = tginfo->tg;
    if ((i = find_tg(tg, n, lw3)) >= 0) {
        /* Access mode = trigram */
        if (n_used) *n_used = 3;
        score = model->lm3g.prob3[tg[i].prob3].l;
    }
    else {
        score = tginfo->bowt + lm3g_bg_score(model, lw2, lw3, n_used);
    }
    sbmtx_unlock(model->lm3g.tginfo_mtx);

    return (score);
}
static ngram_iter_t *
lm3g_template_iter(ngram_model_t *base, int32 wid,
                   int32 *history, int32 n_hist)
{
    NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base;
    lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor));

    ngram_iter_init((ngram_iter_t *)itor, base, n_hist, FALSE);

    if (n_hist == 0) {
        /* Unigram is the easiest. */
        itor->ug = model->lm3g.unigrams + wid;
        return (ngram_iter_t *)itor;
    }
    else if (n_hist == 1) {
        int32 i, n, b;
        /* Find the bigram, as in bg_score above (duplicate code...) */
        itor->ug = model->lm3g.unigrams + history[0];
        b = FIRST_BG(model, history[0]);
        n = FIRST_BG(model, history[0] + 1) - b;
        itor->bg = model->lm3g.bigrams + b;
        /* If no such bigram exists then fail. */
        if ((i = find_bg(itor->bg, n, wid)) < 0) {
            ngram_iter_free((ngram_iter_t *)itor);
            return NULL;
        }
        itor->bg += i;
        return (ngram_iter_t *)itor;
    }
    else if (n_hist == 2) {
        int32 i, n;
        tginfo_t *tginfo, *prev_tginfo;
        /* Find the trigram, as in tg_score above (duplicate code...) */
        itor->ug = model->lm3g.unigrams + history[1];
        prev_tginfo = NULL;
        for (tginfo = model->lm3g.tginfo[history[0]];
             tginfo; tginfo = tginfo->next) {
            if (tginfo->w1 == history[1])
                break;
            prev_tginfo = tginfo;
        }

        if (!tginfo) {
            load_tginfo(model, history[1], history[0]);
            tginfo = model->lm3g.tginfo[history[0]];
        }
        else if (prev_tginfo) {
            prev_tginfo->next = tginfo->next;
            tginfo->next = model->lm3g.tginfo[history[0]];
            model->lm3g.tginfo[history[0]] = tginfo;
        }

        tginfo->used = 1;

        /* Trigrams for w1,w2 now pointed to by tginfo */
        n = tginfo->n_tg;
        itor->tg = tginfo->tg;
        if ((i = find_tg(itor->tg, n, wid)) >= 0) {
            itor->tg += i;
            /* Now advance the bigram pointer accordingly.  FIXME:
             * Note that we actually already found the relevant bigram
             * in load_tginfo. */
            itor->bg = model->lm3g.bigrams;
            while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1))
                   <= (itor->tg - model->lm3g.trigrams))
                ++itor->bg;
            return (ngram_iter_t *)itor;
        }
        else {
            ngram_iter_free((ngram_iter_t *)itor);
            return (ngram_iter_t *)NULL;
        }
    }
    else {
        /* Should not happen. */
        assert(n_hist == 0); /* Guaranteed to fail. */
        ngram_iter_free((ngram_iter_t *)itor);
        return NULL;
    }
}