Пример #1
0
int32
ngram_ng_prob(ngram_model_t * model, int32 wid, int32 * history,
              int32 n_hist, int32 * n_used)
{
    int32 prob, class_weight = 0;
    int i;

    /* Closed vocabulary, OOV word probability is zero */
    if (wid == NGRAM_INVALID_WID)
        return model->log_zero;

    /* "Declassify" wid and history */
    if (NGRAM_IS_CLASSWID(wid)) {
        ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)];

        class_weight = ngram_class_prob(lmclass, wid);
        if (class_weight == 1)  /* Meaning, not found in class. */
            return class_weight;
        wid = lmclass->tag_wid;
    }
    for (i = 0; i < n_hist; ++i) {
        if (history[i] != NGRAM_INVALID_WID
            && NGRAM_IS_CLASSWID(history[i]))
            history[i] =
                model->classes[NGRAM_CLASSID(history[i])]->tag_wid;
    }
    prob = (*model->funcs->raw_score) (model, wid, history,
                                       n_hist, n_used);
    /* Multiply by unigram in-class weight. */
    return prob + class_weight;
}
Пример #2
0
static int32
lm_trie_add_ug(ngram_model_t * base, int32 wid, int32 lweight)
{
    ngram_model_trie_t *model = (ngram_model_trie_t *) base;

    /* This would be very bad if this happened! */
    assert(!NGRAM_IS_CLASSWID(wid));

    /* Reallocate unigram array. */
    model->trie->unigrams =
        (unigram_t *) ckd_realloc(model->trie->unigrams,
                                  sizeof(*model->trie->unigrams) *
                                  (base->n_1g_alloc + 1));
    memset(model->trie->unigrams + (base->n_counts[0] + 1), 0,
           (size_t) (base->n_1g_alloc -
                     base->n_counts[0]) * sizeof(*model->trie->unigrams));
    ++base->n_counts[0];
    lweight += logmath_log(base->lmath, 1.0 / base->n_counts[0]);
    model->trie->unigrams[wid + 1].next = model->trie->unigrams[wid].next;
    model->trie->unigrams[wid].prob = (float) lweight;
    /* This unigram by definition doesn't participate in any bigrams,
     * so its backoff weight is undefined and next pointer same as in finish unigram*/
    model->trie->unigrams[wid].bo = 0;
    /* Finally, increase the unigram count */
    /* FIXME: Note that this can actually be quite bogus due to the
     * presence of class words.  If wid falls outside the unigram
     * count, increase it to compensate, at the cost of no longer
     * really knowing how many unigrams we have :( */
    if ((uint32) wid >= base->n_counts[0])
        base->n_counts[0] = wid + 1;

    return (int32) weight_score(base, lweight);
}