コード例 #1
0
static int32
ngram_model_set_score(ngram_model_t * base, int32 wid,
                      int32 * history, int32 n_hist, int32 * n_used)
{
    ngram_model_set_t *set = (ngram_model_set_t *) base;
    int32 mapwid;
    int32 score;
    int32 i;

    /* Truncate the history. */
    if (n_hist > base->n - 1)
        n_hist = base->n - 1;

    /* Interpolate if there is no current. */
    if (set->cur == -1) {
        score = base->log_zero;
        for (i = 0; i < set->n_models; ++i) {
            int32 j;
            /* Map word and history IDs for each model. */
            mapwid = set->widmap[wid][i];
            for (j = 0; j < n_hist; ++j) {
                if (history[j] == NGRAM_INVALID_WID)
                    set->maphist[j] = NGRAM_INVALID_WID;
                else
                    set->maphist[j] = set->widmap[history[j]][i];
            }
            score = logmath_add(base->lmath, score,
                                set->lweights[i] +
                                ngram_ng_score(set->lms[i],
                                               mapwid, set->maphist,
                                               n_hist, n_used));
        }
    }
    else {
        int32 j;
        /* Map word and history IDs (FIXME: do this in a function?) */
        mapwid = set->widmap[wid][set->cur];
        for (j = 0; j < n_hist; ++j) {
            if (history[j] == NGRAM_INVALID_WID)
                set->maphist[j] = NGRAM_INVALID_WID;
            else
                set->maphist[j] = set->widmap[history[j]][set->cur];
        }
        score = ngram_ng_score(set->lms[set->cur],
                               mapwid, set->maphist, n_hist, n_used);
    }

    return score;
}
コード例 #2
0
ファイル: ngram_model.c プロジェクト: Bangybug/sphinxbase
int32
ngram_score(ngram_model_t * model, const char *word, ...)
{
    va_list history;
    const char *hword;
    int32 *histid;
    int32 n_hist;
    int32 n_used;
    int32 prob;

    va_start(history, word);
    n_hist = 0;
    while ((hword = va_arg(history, const char *)) != NULL)
        ++n_hist;
    va_end(history);

    histid = ckd_calloc(n_hist, sizeof(*histid));
    va_start(history, word);
    n_hist = 0;
    while ((hword = va_arg(history, const char *)) != NULL) {
        histid[n_hist] = ngram_wid(model, hword);
        ++n_hist;
    }
    va_end(history);

    prob = ngram_ng_score(model, ngram_wid(model, word),
                          histid, n_hist, &n_used);
    ckd_free(histid);
    return prob;
}
コード例 #3
0
int32
ngram_tg_score(ngram_model_t *model, int32 w3, int32 w2, int32 w1, int32 *n_used)
{
    int32 hist[2];
    hist[0] = w2;
    hist[1] = w1;
    return ngram_ng_score(model, w3, hist, 2, n_used);
}
コード例 #4
0
static int
calc_entropy(ngram_model_t *lm, char **words, int32 n,
	     int32 *out_n_ccs, int32 *out_n_oovs, int32 *out_lm_score)
{
	int32 *wids;
	int32 startwid;
	int32 i, ch, nccs, noovs, unk;

        if (n == 0)
            return 0;

        unk = ngram_unknown_wid(lm);

	/* Reverse this array into an array of word IDs. */
	wids = ckd_calloc(n, sizeof(*wids));
	for (i = 0; i < n; ++i)
		wids[n-i-1] = ngram_wid(lm, words[i]);
	/* Skip <s> as it's a context cue (HACK, this should be configurable). */
	startwid = ngram_wid(lm, "<s>");

	/* Now evaluate the list of words in reverse using the
	 * remainder of the array as the history. */
	ch = noovs = nccs = 0;
	for (i = 0; i < n; ++i) {
		int32 n_used;
		int32 prob;

		/* Skip <s> as it's a context cue (HACK, this should be configurable). */
		if (wids[i] == startwid) {
			++nccs;
			continue;
		}
		/* Skip and count OOVs. */
		if (wids[i] == NGRAM_INVALID_WID || wids[i] == unk) {
			++noovs;
			continue;
		}
		/* Sum up information for each N-gram */
		prob = ngram_ng_score(lm,
				      wids[i], wids + i + 1,
				      n - i - 1, &n_used);
                if (verbose) {
                    int m;
                    printf("log P(%s|", ngram_word(lm, wids[i]));
                    m = i + ngram_model_get_size(lm) - 1;
                    if (m >= n)
                        m = n - 1;
                    while (m > i) {
                        printf("%s ", ngram_word(lm, wids[m--]));
                    }
                    printf(") = %d\n", prob);
                }
		ch -= prob;
	}

	if (out_n_ccs) *out_n_ccs = nccs;
	if (out_n_oovs) *out_n_oovs = noovs;

	/* Calculate cross-entropy CH = - 1/N sum log P(W|H) */
        n -= (nccs + noovs);
        if (n <= 0)
            return 0;
        if (out_lm_score)
            *out_lm_score = -ch;
	return ch / n;
}
コード例 #5
0
ファイル: ngram_model.c プロジェクト: Bangybug/sphinxbase
int32
ngram_bg_score(ngram_model_t * model, int32 w2, int32 w1, int32 * n_used)
{
    return ngram_ng_score(model, w2, &w1, 1, n_used);
}