void run_tests(ngram_model_t *model) { int32 n_used; ngram_tg_score(model, ngram_wid(model, "daines"), ngram_wid(model, "huggins"), ngram_wid(model, "huggins"), &n_used); TEST_EQUAL(n_used, 2); ngram_tg_score(model, ngram_wid(model, "david"), ngram_wid(model, "david"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); /* Apply weights. */ ngram_model_apply_weights(model, 7.5, 0.5, 1.0); /* -9452 * 7.5 + log(0.5) = -77821 */ TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -77821); /* Recover original score. */ TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_probv(model, "huggins", "david", NULL), -831); /* Un-apply weights. */ ngram_model_apply_weights(model, 1.0, 1.0, 1.0); TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831); /* Recover original score. */ TEST_EQUAL_LOG(ngram_probv(model, "daines", "huggins", "david", NULL), -9452); /* Pre-weighting, this should give the "raw" score. */ TEST_EQUAL_LOG(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL_LOG(ngram_score(model, "huggins", "david", NULL), -831); /* Verify that backoff mode calculations work. */ ngram_bg_score(model, ngram_wid(model, "huggins"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 2); ngram_bg_score(model, ngram_wid(model, "blorglehurfle"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); ngram_bg_score(model, ngram_wid(model, "david"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 1); ngram_tg_score(model, ngram_wid(model, "daines"), ngram_wid(model, "huggins"), ngram_wid(model, "david"), &n_used); TEST_EQUAL(n_used, 3); }
ngram_model_t * ngram_model_read(cmd_ln_t * config, const char *file_name, ngram_file_type_t file_type, logmath_t * lmath) { ngram_model_t *model = NULL; switch (file_type) { case NGRAM_AUTO:{ if ((model = ngram_model_trie_read_bin(config, file_name, lmath)) != NULL) break; if ((model = ngram_model_trie_read_arpa(config, file_name, lmath)) != NULL) break; if ((model = ngram_model_trie_read_dmp(config, file_name, lmath)) != NULL) break; return NULL; } case NGRAM_ARPA: model = ngram_model_trie_read_arpa(config, file_name, lmath); break; case NGRAM_BIN: if ((model = ngram_model_trie_read_bin(config, file_name, lmath)) != NULL) break; if ((model = ngram_model_trie_read_dmp(config, file_name, lmath)) != NULL) break; return NULL; default: E_ERROR("language model file type not supported\n"); return NULL; } /* Now set weights based on config if present. */ if (config) { float32 lw = 1.0; float32 wip = 1.0; if (cmd_ln_exists_r(config, "-lw")) lw = cmd_ln_float32_r(config, "-lw"); if (cmd_ln_exists_r(config, "-wip")) wip = cmd_ln_float32_r(config, "-wip"); ngram_model_apply_weights(model, lw, wip); } return model; }
static int ngram_model_set_apply_weights(ngram_model_t * base, float32 lw, float32 wip) { ngram_model_set_t *set = (ngram_model_set_t *) base; int32 i; /* Apply weights to each sub-model. */ for (i = 0; i < set->n_models; ++i) ngram_model_apply_weights(set->lms[i], lw, wip); return 0; }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; const char *lmfn, *probdefn, *lsnfn, *text; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; verbose = cmd_ln_boolean_r(config, "-verbose"); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } /* Load the language model. */ lmfn = cmd_ln_str_r(config, "-lm"); if (lmfn == NULL || (lm = ngram_model_read(config, lmfn, NGRAM_AUTO, lmath)) == NULL) { E_FATAL("Failed to load language model from %s\n", cmd_ln_str_r(config, "-lm")); } if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL) ngram_model_read_classdef(lm, probdefn); ngram_model_apply_weights(lm, cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), cmd_ln_float32_r(config, "-uw")); /* Now evaluate some text. */ lsnfn = cmd_ln_str_r(config, "-lsn"); text = cmd_ln_str_r(config, "-text"); if (lsnfn) { evaluate_file(lm, lmath, lsnfn); } else if (text) { evaluate_string(lm, lmath, text); } return 0; }