ngram_model_t *
ngram_model_set_init(cmd_ln_t * config,
                     ngram_model_t ** models,
                     char **names, const float32 * weights, int32 n_models)
{
    ngram_model_set_t *model;
    ngram_model_t *base;
    logmath_t *lmath;
    int32 i, n;

    if (n_models == 0)          /* WTF */
        return NULL;

    /* Do consistency checking on the models.  They must all use the
     * same logbase and shift. */
    lmath = models[0]->lmath;
    for (i = 1; i < n_models; ++i) {
        if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath)
            || logmath_get_shift(models[i]->lmath) !=
            logmath_get_shift(lmath)) {
            E_ERROR
                ("Log-math parameters don't match, will not create LM set\n");
            return NULL;
        }
    }

    /* Allocate the combined model, initialize it. */
    model = ckd_calloc(1, sizeof(*model));
    base = &model->base;
    model->n_models = n_models;
    model->lms = ckd_calloc(n_models, sizeof(*model->lms));
    model->names = ckd_calloc(n_models, sizeof(*model->names));
    /* Initialize weights to a uniform distribution */
    model->lweights = ckd_calloc(n_models, sizeof(*model->lweights));
    {
        int32 uniform = logmath_log(lmath, 1.0 / n_models);
        for (i = 0; i < n_models; ++i)
            model->lweights[i] = uniform;
    }
    /* Default to interpolate if weights were given. */
    if (weights)
        model->cur = -1;

    n = 0;
    for (i = 0; i < n_models; ++i) {
        model->lms[i] = ngram_model_retain(models[i]);
        model->names[i] = ckd_salloc(names[i]);
        if (weights)
            model->lweights[i] = logmath_log(lmath, weights[i]);
        /* N is the maximum of all merged models. */
        if (models[i]->n > n)
            n = models[i]->n;
    }
    /* Allocate the history mapping table. */
    model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist));

    /* Now build the word-ID mapping and merged vocabulary. */
    build_widmap(base, lmath, n);
    return base;
}
Пример #2
0
int
main(int argc, char *argv[])
{
	logmath_t *lmath;
	ngram_model_t *model;

	/* Initialize a logmath object to pass to ngram_read */
	lmath = logmath_init(1.0001, 0, 0);

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm", NGRAM_ARPA, lmath);
	test_lm_ug_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm.dmp", NGRAM_BIN, lmath);
	test_lm_ug_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model with missing backoffs */
	model = ngram_model_read(NULL, LMDIR "/104.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read corrupted language model, error expected */
	model = ngram_model_read(NULL, LMDIR "/105.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(NULL, model);

	/* Read corrupted language model, error expected */
	model = ngram_model_read(NULL, LMDIR "/106.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(NULL, model);

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.bz2", NGRAM_ARPA, lmath);
	test_lm_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.bin", NGRAM_BIN, lmath);
	test_lm_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath);
	test_lm_vals(model);
	/* Test refcounting. */
	model = ngram_model_retain(model);
	TEST_EQUAL(1, ngram_model_free(model));
	TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452);
	TEST_EQUAL(0, ngram_model_free(model));


	logmath_free(lmath);

	return 0;
}