void
run_tests(ngram_model_t *model)
{
	int32 n_used;

	ngram_tg_score(model,
		       ngram_wid(model, "daines"),
		       ngram_wid(model, "huggins"),
		       ngram_wid(model, "huggins"), &n_used);
	TEST_EQUAL(n_used, 2);
	ngram_tg_score(model,
		       ngram_wid(model, "david"),
		       ngram_wid(model, "david"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 1);

	/* Apply weights. */
	ngram_model_apply_weights(model, 7.5, 0.5, 1.0);
	/* -9452 * 7.5 + log(0.5) = -77821 */
	TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
		   -77821);
	/* Recover original score. */
	TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL),
		   -9452);
	TEST_EQUAL_LOG(ngram_probv(model, "huggins", "david", NULL), -831);

	/* Un-apply weights. */
	ngram_model_apply_weights(model, 1.0, 1.0, 1.0);
	TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
		       -9452);
	TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831);
	/* Recover original score. */
	TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL),
		       -9452);

	/* Pre-weighting, this should give the "raw" score. */
	TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL),
		       -9452);
	TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831);
	/* Verify that backoff mode calculations work. */
	ngram_bg_score(model,
		       ngram_wid(model, "huggins"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 2);
	ngram_bg_score(model,
		       ngram_wid(model, "blorglehurfle"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 1);
	ngram_bg_score(model,
		       ngram_wid(model, "david"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 1);
	ngram_tg_score(model,
		       ngram_wid(model, "daines"),
		       ngram_wid(model, "huggins"),
		       ngram_wid(model, "david"), &n_used);
	TEST_EQUAL(n_used, 3);
}
Beispiel #2
0
static int
test_lm_ug_vals(ngram_model_t *model)
{
	TEST_ASSERT(model);
	TEST_EQUAL(ngram_score(model, "BACKWARD", NULL), -53008);
	return 0;
}
Beispiel #3
0
int
main(int argc, char *argv[])
{
	logmath_t *lmath;
	ngram_model_t *model;

	/* Initialize a logmath object to pass to ngram_read */
	lmath = logmath_init(1.0001, 0, 0);

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm", NGRAM_ARPA, lmath);
	test_lm_ug_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm.dmp", NGRAM_BIN, lmath);
	test_lm_ug_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model with missing backoffs */
	model = ngram_model_read(NULL, LMDIR "/104.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read corrupted language model, error expected */
	model = ngram_model_read(NULL, LMDIR "/105.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(NULL, model);

	/* Read corrupted language model, error expected */
	model = ngram_model_read(NULL, LMDIR "/106.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(NULL, model);

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.bz2", NGRAM_ARPA, lmath);
	test_lm_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.bin", NGRAM_BIN, lmath);
	test_lm_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath);
	test_lm_vals(model);
	/* Test refcounting. */
	model = ngram_model_retain(model);
	TEST_EQUAL(1, ngram_model_free(model));
	TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452);
	TEST_EQUAL(0, ngram_model_free(model));


	logmath_free(lmath);

	return 0;
}
Beispiel #4
0
static int
test_lm_vals(ngram_model_t *model)
{
	int32 n_used;
	TEST_ASSERT(model);
	TEST_EQUAL(ngram_wid(model, "<UNK>"), 0);
	TEST_EQUAL(strcmp(ngram_word(model, 0), "<UNK>"), 0);
	TEST_EQUAL(ngram_wid(model, "absolute"), 13);
	TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0);
	/* Test unigrams. */
	TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346);
	TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"),
				  NGRAM_INVALID_WID, &n_used), -75346);
	TEST_EQUAL(n_used, 1);
	TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208);
	TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"),
				  NGRAM_INVALID_WID, &n_used), -64208);
	TEST_EQUAL(n_used, 1);
	/* Test bigrams. */
	TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831);
	/* Test trigrams. */
	TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9450);
	return 0;
}
void
run_tests(logmath_t *lmath, ngram_model_t *model)
{
	int32 rv, i;

	TEST_ASSERT(model);

	TEST_EQUAL(ngram_wid(model, "scylla"), 285);
	TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);

	rv = ngram_model_read_classdef(model, LMDIR "/100.probdef");
	TEST_EQUAL(rv, 0);

	/* Verify that class word IDs remain the same. */
	TEST_EQUAL(ngram_wid(model, "scylla"), 285);
	TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0);

	/* Verify in-class word IDs. */
	TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400);

	/* Verify in-class and out-class unigram scores. */
	TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL),
		       logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4));
	TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL),
		       logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1));
	TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL),
		       logmath_log10_to_log(lmath, -2.7884));
	TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL),
		       logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7));
	TEST_EQUAL_LOG(ngram_score(model, "zero", NULL),
		       logmath_log10_to_log(lmath, -1.9038));

	/* Verify class bigram scores. */
	TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL),
		       logmath_log10_to_log(lmath, -1.2642));
	TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL),
		       logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4));
	TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL),
		       logmath_log10_to_log(lmath, -0.5172));
	TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL),
		       logmath_log10_to_log(lmath, -0.5172));
	TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL),
		       logmath_log10_to_log(lmath, -0.5172));

	/* Verify class trigram scores. */
	TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL),
		       logmath_log10_to_log(lmath, -0.5725));
	TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL),
		       logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7));
	TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL),
		       logmath_log10_to_log(lmath, -0.9404));
	TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL),
		       logmath_log10_to_log(lmath, -0.9404));

	/* Add words to classes. */
	rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0);
	TEST_ASSERT(rv >= 0);
	TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196);
	TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL),
		       logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2));
	printf("scrappy:scylla %08x %d %f\n", 
	       ngram_wid(model, "scrappy:scylla"),
	       ngram_score(model, "scrappy:scylla", NULL),
	       logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL)));
	/* Add a lot of words to a class. */
	for (i = 0; i < 129; ++i) {
		char word[32];
		sprintf(word, "%d:scylla", i);
		rv = ngram_model_add_class_word(model, "scylla", word, 1.0);
		printf("%s %08x %d %f\n", word,
		       ngram_wid(model, word),
		       ngram_score(model, word, NULL),
		       logmath_exp(lmath, ngram_score(model, word, NULL)));
		TEST_ASSERT(rv >= 0);
		TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i);
	}

	/* Add a new class. */
	{
		const char *words[] = { "blatz:foobie", "hurf:foobie" };
		float32 weights[] = { 0.6, 0.4 };
		int32 foobie_prob;
		rv = ngram_model_add_class(model, "[foobie]", 1.0,
					   words, weights, 2);
		TEST_ASSERT(rv >= 0);
		foobie_prob = ngram_score(model, "[foobie]", NULL);
		TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL),
			       foobie_prob + logmath_log(lmath, 0.6));
		TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL),
			       foobie_prob + logmath_log(lmath, 0.4));
	}
}