int32 ngram_ng_prob(ngram_model_t * model, int32 wid, int32 * history, int32 n_hist, int32 * n_used) { int32 prob, class_weight = 0; int i; /* Closed vocabulary, OOV word probability is zero */ if (wid == NGRAM_INVALID_WID) return model->log_zero; /* "Declassify" wid and history */ if (NGRAM_IS_CLASSWID(wid)) { ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)]; class_weight = ngram_class_prob(lmclass, wid); if (class_weight == 1) /* Meaning, not found in class. */ return class_weight; wid = lmclass->tag_wid; } for (i = 0; i < n_hist; ++i) { if (history[i] != NGRAM_INVALID_WID && NGRAM_IS_CLASSWID(history[i])) history[i] = model->classes[NGRAM_CLASSID(history[i])]->tag_wid; } prob = (*model->funcs->raw_score) (model, wid, history, n_hist, n_used); /* Multiply by unigram in-class weight. */ return prob + class_weight; }
static int32 lm_trie_add_ug(ngram_model_t * base, int32 wid, int32 lweight) { ngram_model_trie_t *model = (ngram_model_trie_t *) base; /* This would be very bad if this happened! */ assert(!NGRAM_IS_CLASSWID(wid)); /* Reallocate unigram array. */ model->trie->unigrams = (unigram_t *) ckd_realloc(model->trie->unigrams, sizeof(*model->trie->unigrams) * (base->n_1g_alloc + 1)); memset(model->trie->unigrams + (base->n_counts[0] + 1), 0, (size_t) (base->n_1g_alloc - base->n_counts[0]) * sizeof(*model->trie->unigrams)); ++base->n_counts[0]; lweight += logmath_log(base->lmath, 1.0 / base->n_counts[0]); model->trie->unigrams[wid + 1].next = model->trie->unigrams[wid].next; model->trie->unigrams[wid].prob = (float) lweight; /* This unigram by definition doesn't participate in any bigrams, * so its backoff weight is undefined and next pointer same as in finish unigram*/ model->trie->unigrams[wid].bo = 0; /* Finally, increase the unigram count */ /* FIXME: Note that this can actually be quite bogus due to the * presence of class words. If wid falls outside the unigram * count, increase it to compensate, at the cost of no longer * really knowing how many unigrams we have :( */ if ((uint32) wid >= base->n_counts[0]) base->n_counts[0] = wid + 1; return (int32) weight_score(base, lweight); }