void
run_tests(ngram_model_t *model)
{
	int32 n_used;

	ngram_tg_score(model,
		       ngram_wid(model, "daines"),
		       ngram_wid(model, "huggins"),
		       ngram_wid(model, "huggins"), &n_used);
	TEST_EQUAL(n_used, 2);
	ngram_tg_score(model,
		       ngram_wid(model, "david"),
		       ngram_wid(model, "david"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 1);

	/* Apply weights. */
	ngram_model_apply_weights(model, 7.5, 0.5, 1.0);
	/* -9452 * 7.5 + log(0.5) = -77821 */
	TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
		   -77821);
	/* Recover original score. */
	TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL),
		   -9452);
	TEST_EQUAL_LOG(ngram_probv(model, "huggins", "david", NULL), -831);

	/* Un-apply weights. */
	ngram_model_apply_weights(model, 1.0, 1.0, 1.0);
	TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
		       -9452);
	TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831);
	/* Recover original score. */
	TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL),
		       -9452);

	/* Pre-weighting, this should give the "raw" score. */
	TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
		       -9452);
	TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831);
	/* Verify that backoff mode calculations work. */
	ngram_bg_score(model,
		       ngram_wid(model, "huggins"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 2);
	ngram_bg_score(model,
		       ngram_wid(model, "blorglehurfle"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 1);
	ngram_bg_score(model,
		       ngram_wid(model, "david"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 1);
	ngram_tg_score(model,
		       ngram_wid(model, "daines"),
		       ngram_wid(model, "huggins"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 3);
}
Exemple #2
0
ngram_model_t *
ngram_model_read(cmd_ln_t * config,
                 const char *file_name,
                 ngram_file_type_t file_type, logmath_t * lmath)
{
    ngram_model_t *model = NULL;
    switch (file_type) {
    case NGRAM_AUTO:{
            if ((model =
                 ngram_model_trie_read_bin(config, file_name,
                                           lmath)) != NULL)
                break;
            if ((model =
                 ngram_model_trie_read_arpa(config, file_name,
                                            lmath)) != NULL)
                break;
            if ((model =
                 ngram_model_trie_read_dmp(config, file_name,
                                           lmath)) != NULL)
                break;
            return NULL;
        }
    case NGRAM_ARPA:
        model = ngram_model_trie_read_arpa(config, file_name, lmath);
        break;
    case NGRAM_BIN:
        if ((model =
             ngram_model_trie_read_bin(config, file_name, lmath)) != NULL)
            break;
        if ((model =
             ngram_model_trie_read_dmp(config, file_name, lmath)) != NULL)
            break;
        return NULL;
    default:
        E_ERROR("language model file type not supported\n");
        return NULL;
    }

    /* Now set weights based on config if present. */
    if (config) {
        float32 lw = 1.0;
        float32 wip = 1.0;

        if (cmd_ln_exists_r(config, "-lw"))
            lw = cmd_ln_float32_r(config, "-lw");
        if (cmd_ln_exists_r(config, "-wip"))
            wip = cmd_ln_float32_r(config, "-wip");

        ngram_model_apply_weights(model, lw, wip);
    }

    return model;
}
static int
ngram_model_set_apply_weights(ngram_model_t * base, float32 lw,
                              float32 wip)
{
    ngram_model_set_t *set = (ngram_model_set_t *) base;
    int32 i;

    /* Apply weights to each sub-model. */
    for (i = 0; i < set->n_models; ++i)
        ngram_model_apply_weights(set->lms[i], lw, wip);
    return 0;
}
int
main(int argc, char *argv[])
{
	cmd_ln_t *config;
	ngram_model_t *lm = NULL;
	logmath_t *lmath;
	const char *lmfn, *probdefn, *lsnfn, *text;

	if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
		return 1;

        verbose = cmd_ln_boolean_r(config, "-verbose");

	/* Create log math object. */
	if ((lmath = logmath_init
	     (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) {
		E_FATAL("Failed to initialize log math\n");
	}

	/* Load the language model. */
	lmfn = cmd_ln_str_r(config, "-lm");
	if (lmfn == NULL
	    || (lm = ngram_model_read(config, lmfn,
				      NGRAM_AUTO, lmath)) == NULL) {
		E_FATAL("Failed to load language model from %s\n",
			cmd_ln_str_r(config, "-lm"));
	}
        if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL)
            ngram_model_read_classdef(lm, probdefn);
        ngram_model_apply_weights(lm,
                                  cmd_ln_float32_r(config, "-lw"),
                                  cmd_ln_float32_r(config, "-wip"),
                                  cmd_ln_float32_r(config, "-uw"));

	/* Now evaluate some text. */
	lsnfn = cmd_ln_str_r(config, "-lsn");
	text = cmd_ln_str_r(config, "-text");
	if (lsnfn) {
		evaluate_file(lm, lmath, lsnfn);
	}
	else if (text) {
		evaluate_string(lm, lmath, text);
	}

	return 0;
}