void run_tests(ngram_model_t *model) { int32 n_used; ngram_tg_score(model, ngram_wid(model, "daines"), ngram_wid(model, "huggins"), ngram_wid(model, "huggins"), &n_used); TEST_EQUAL(n_used, 2); ngram_tg_score(model, ngram_wid(model, "david"), ngram_wid(model, "david"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); /* Apply weights. */ ngram_model_apply_weights(model, 7.5, 0.5, 1.0); /* -9452 * 7.5 + log(0.5) = -77821 */ TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -77821); /* Recover original score. */ TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_probv(model, "huggins", "david", NULL), -831); /* Un-apply weights. */ ngram_model_apply_weights(model, 1.0, 1.0, 1.0); TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831); /* Recover original score. */ TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL), -9452); /* Pre-weighting, this should give the "raw" score. */ TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831); /* Verify that backoff mode calculations work. */ ngram_bg_score(model, ngram_wid(model, "huggins"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 2); ngram_bg_score(model, ngram_wid(model, "blorglehurfle"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); ngram_bg_score(model, ngram_wid(model, "david"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); ngram_tg_score(model, ngram_wid(model, "daines"), ngram_wid(model, "huggins"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 3); }
static int test_lm_ug_vals(ngram_model_t *model) { TEST_ASSERT(model); TEST_EQUAL(ngram_score(model, "BACKWARD", NULL), -53008); return 0; }
int main(int argc, char *argv[]) { logmath_t *lmath; ngram_model_t *model; /* Initialize a logmath object to pass to ngram_read */ lmath = logmath_init(1.0001, 0, 0); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm", NGRAM_ARPA, lmath); test_lm_ug_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm.dmp", NGRAM_BIN, lmath); test_lm_ug_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model with missing backoffs */ model = ngram_model_read(NULL, LMDIR "/104.lm.gz", NGRAM_ARPA, lmath); TEST_EQUAL(0, ngram_model_free(model)); /* Read corrupted language model, error expected */ model = ngram_model_read(NULL, LMDIR "/105.lm.gz", NGRAM_ARPA, lmath); TEST_EQUAL(NULL, model); /* Read corrupted language model, error expected */ model = ngram_model_read(NULL, LMDIR "/106.lm.gz", NGRAM_ARPA, lmath); TEST_EQUAL(NULL, model); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/100.lm.bz2", NGRAM_ARPA, lmath); test_lm_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/100.lm.bin", NGRAM_BIN, lmath); test_lm_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath); test_lm_vals(model); /* Test refcounting. */ model = ngram_model_retain(model); TEST_EQUAL(1, ngram_model_free(model)); TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL(0, ngram_model_free(model)); logmath_free(lmath); return 0; }
static int test_lm_vals(ngram_model_t *model) { int32 n_used; TEST_ASSERT(model); TEST_EQUAL(ngram_wid(model, "<UNK>"), 0); TEST_EQUAL(strcmp(ngram_word(model, 0), "<UNK>"), 0); TEST_EQUAL(ngram_wid(model, "absolute"), 13); TEST_EQUAL(strcmp(ngram_word(model, 13), "absolute"), 0); /* Test unigrams. */ TEST_EQUAL(ngram_score(model, "<UNK>", NULL), -75346); TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "<UNK>"), NGRAM_INVALID_WID, &n_used), -75346); TEST_EQUAL(n_used, 1); TEST_EQUAL(ngram_score(model, "sphinxtrain", NULL), -64208); TEST_EQUAL(ngram_bg_score(model, ngram_wid(model, "sphinxtrain"), NGRAM_INVALID_WID, &n_used), -64208); TEST_EQUAL(n_used, 1); /* Test bigrams. */ TEST_EQUAL(ngram_score(model, "huggins", "david", NULL), -831); /* Test trigrams. */ TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9450); return 0; }
void run_tests(logmath_t *lmath, ngram_model_t *model) { int32 rv, i; TEST_ASSERT(model); TEST_EQUAL(ngram_wid(model, "scylla"), 285); TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0); rv = ngram_model_read_classdef(model, LMDIR "/100.probdef"); TEST_EQUAL(rv, 0); /* Verify that class word IDs remain the same. */ TEST_EQUAL(ngram_wid(model, "scylla"), 285); TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0); /* Verify in-class word IDs. */ TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400); /* Verify in-class and out-class unigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4)); TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1)); TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL), logmath_log10_to_log(lmath, -2.7884)); TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL), logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7)); TEST_EQUAL_LOG(ngram_score(model, "zero", NULL), logmath_log10_to_log(lmath, -1.9038)); /* Verify class bigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL), logmath_log10_to_log(lmath, -1.2642)); TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL), logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); /* Verify class trigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL), logmath_log10_to_log(lmath, -0.5725)); TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL), logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7)); TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL), logmath_log10_to_log(lmath, -0.9404)); TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL), logmath_log10_to_log(lmath, -0.9404)); /* Add words to classes. */ rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0); TEST_ASSERT(rv >= 0); TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196); TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2)); printf("scrappy:scylla %08x %d %f\n", ngram_wid(model, "scrappy:scylla"), ngram_score(model, "scrappy:scylla", NULL), logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL))); /* Add a lot of words to a class. */ for (i = 0; i < 129; ++i) { char word[32]; sprintf(word, "%d:scylla", i); rv = ngram_model_add_class_word(model, "scylla", word, 1.0); printf("%s %08x %d %f\n", word, ngram_wid(model, word), ngram_score(model, word, NULL), logmath_exp(lmath, ngram_score(model, word, NULL))); TEST_ASSERT(rv >= 0); TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i); } /* Add a new class. */ { const char *words[] = { "blatz:foobie", "hurf:foobie" }; float32 weights[] = { 0.6, 0.4 }; int32 foobie_prob; rv = ngram_model_add_class(model, "[foobie]", 1.0, words, weights, 2); TEST_ASSERT(rv >= 0); foobie_prob = ngram_score(model, "[foobie]", NULL); TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL), foobie_prob + logmath_log(lmath, 0.6)); TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL), foobie_prob + logmath_log(lmath, 0.4)); } }