int main(int argc, char *argv[]) { logmath_t *lmath; ngram_model_t *model; /* Initialize a logmath object to pass to ngram_read */ lmath = logmath_init(1.0001, 0, 0); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_BIN, lmath); TEST_ASSERT(model); ngram_model_casefold(model, NGRAM_UPPER); TEST_EQUAL(0, strcmp("</s>", ngram_word(model, 5))); TEST_EQUAL(0, strcmp("BE", ngram_word(model, 42))); TEST_EQUAL(0, strcmp("FLOORED", ngram_word(model, 130))); TEST_EQUAL(0, strcmp("ZERO", ngram_word(model, 398))); TEST_EQUAL(0, strcmp("~", ngram_word(model, 399))); ngram_model_casefold(model, NGRAM_LOWER); TEST_EQUAL(0, strcmp("</s>", ngram_word(model, 5))); TEST_EQUAL(0, strcmp("be", ngram_word(model, 42))); TEST_EQUAL(0, strcmp("floored", ngram_word(model, 130))); TEST_EQUAL(0, strcmp("zero", ngram_word(model, 398))); TEST_EQUAL(0, strcmp("~", ngram_word(model, 399))); ngram_model_free(model); logmath_free(lmath); return 0; }
int main(int argc, char *argv[]) { logmath_t *lmath; ngram_model_t *model; /* Initialize a logmath object to pass to ngram_read */ lmath = logmath_init(1.0001, 0, 0); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm", NGRAM_ARPA, lmath); test_lm_ug_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm.dmp", NGRAM_BIN, lmath); test_lm_ug_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model with missing backoffs */ model = ngram_model_read(NULL, LMDIR "/104.lm.gz", NGRAM_ARPA, lmath); TEST_EQUAL(0, ngram_model_free(model)); /* Read corrupted language model, error expected */ model = ngram_model_read(NULL, LMDIR "/105.lm.gz", NGRAM_ARPA, lmath); TEST_EQUAL(NULL, model); /* Read corrupted language model, error expected */ model = ngram_model_read(NULL, LMDIR "/106.lm.gz", NGRAM_ARPA, lmath); TEST_EQUAL(NULL, model); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/100.lm.bz2", NGRAM_ARPA, lmath); test_lm_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/100.lm.bin", NGRAM_BIN, lmath); test_lm_vals(model); TEST_EQUAL(0, ngram_model_free(model)); /* Read a language model */ model = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath); test_lm_vals(model); /* Test refcounting. */ model = ngram_model_retain(model); TEST_EQUAL(1, ngram_model_free(model)); TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452); TEST_EQUAL(0, ngram_model_free(model)); logmath_free(lmath); return 0; }
int main(int argc, char *argv[]) { logmath_t *lmath; ngram_model_t *model; lmath = logmath_init(1.0001, 0, 0); model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath); run_tests(model); ngram_model_free(model); model = ngram_model_read(NULL, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath); run_tests(model); ngram_model_free(model); logmath_free(lmath); return 0; }
int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path) { ngram_model_t *lm; int result; lm = NULL; if (path) lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); result = ps_set_allphone(ps, name, lm); if (lm) ngram_model_free(lm); return result; }
int ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path) { ngram_model_t *lm; int result; lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); if (!lm) return -1; result = ps_set_lm(ps, name, lm); ngram_model_free(lm); return result; }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; const char *lmfn, *probdefn, *lsnfn, *text; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; verbose = cmd_ln_boolean_r(config, "-verbose"); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } /* Load the language model. */ lmfn = cmd_ln_str_r(config, "-lm"); if (lmfn == NULL || (lm = ngram_model_read(config, lmfn, NGRAM_AUTO, lmath)) == NULL) { E_FATAL("Failed to load language model from %s\n", cmd_ln_str_r(config, "-lm")); } if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL) ngram_model_read_classdef(lm, probdefn); ngram_model_apply_weights(lm, cmd_ln_float32_r(config, "-lw"), cmd_ln_float32_r(config, "-wip"), cmd_ln_float32_r(config, "-uw")); /* Now evaluate some text. */ lsnfn = cmd_ln_str_r(config, "-lsn"); text = cmd_ln_str_r(config, "-text"); if (lsnfn) { evaluate_file(lm, lmath, lsnfn); } else if (text) { evaluate_string(lm, lmath, text); } return 0; }
static void test_set_search() { cmd_ln_t *config = default_config(); ps_decoder_t *ps = ps_init(config); ps_search_iter_t *itor; jsgf_t *jsgf = jsgf_parse_file(DATADIR "/goforward.gram", NULL); fsg_model_t *fsg = jsgf_build_fsg(jsgf, jsgf_get_rule(jsgf, "goforward.move"), ps->lmath, cmd_ln_int32_r(config, "-lw")); TEST_ASSERT(!ps_set_fsg(ps, "goforward", fsg)); fsg_model_free(fsg); TEST_ASSERT(!ps_set_jsgf_file(ps, "goforward_other", DATADIR "/goforward.gram")); ngram_model_t *lm = ngram_model_read(config, DATADIR "/tidigits/lm/tidigits.lm.dmp", NGRAM_AUTO, ps->lmath); TEST_ASSERT(!ps_set_lm(ps, "tidigits", lm)); ngram_model_free(lm); TEST_ASSERT(!ps_set_search(ps, "tidigits")); TEST_ASSERT(!ps_set_search(ps, "goforward")); itor = ps_search_iter(ps); TEST_EQUAL(0, strcmp("goforward_other", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(0, strcmp("tidigits", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(0, strcmp("goforward", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(0, strcmp("phone_loop", ps_search_iter_val(itor))); itor = ps_search_iter_next(itor); TEST_EQUAL(NULL, itor); TEST_ASSERT(!ps_start_utt(ps)); TEST_ASSERT(!ps_end_utt(ps)); ps_free(ps); cmd_ln_free_r(config); }
int main() { logmath_t *logmath; ngram_model_t *model; FILE *fp; char *line = NULL, *grapheme, *phoneme, *predicted_phoneme; int32 different_word_count = 0, fit_count = 0; size_t len = 0; err_set_logfp(NULL); logmath = logmath_init(1.0001f, 0, 0); model = ngram_model_read(NULL, "cmudict-en-us.dmp", NGRAM_AUTO, logmath); fp = fopen("cmudict-en-us.dict", "r"); while (getline(&line, &len, fp) != -1) { grapheme = strtok(line, " "); phoneme = strtok(NULL, "\n"); if (strstr(grapheme, "(")) { grapheme = strtok(grapheme, "("); } else { different_word_count++; } predicted_phoneme = g2p(model, grapheme, 100); if (predicted_phoneme && strcmp(phoneme, predicted_phoneme) == 0) { fit_count++; } ckd_free(predicted_phoneme); } printf("%d %d %f\n", fit_count, different_word_count, fit_count * 1.0 / different_word_count); ckd_free(grapheme); fclose(fp); ngram_model_free(model); logmath_free(logmath); }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; int itype, otype; char const *kase; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; if (cmd_ln_boolean_r(config, "-help")) { usagemsg(argv[0]); } err_set_debug_level(cmd_ln_int32_r(config, "-debug")); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } if (cmd_ln_str_r(config, "-i") == NULL || cmd_ln_str_r(config, "-i") == NULL) { E_ERROR("Please specify both input and output models\n"); goto error_out; } /* Load the input language model. */ if (cmd_ln_str_r(config, "-ifmt")) { if ((itype = ngram_str_to_type(cmd_ln_str_r(config, "-ifmt"))) == NGRAM_INVALID) { E_ERROR("Invalid input type %s\n", cmd_ln_str_r(config, "-ifmt")); goto error_out; } lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), itype, lmath); } else { lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), NGRAM_AUTO, lmath); } /* Guess or set the output language model type. */ if (cmd_ln_str_r(config, "-ofmt")) { if ((otype = ngram_str_to_type(cmd_ln_str_r(config, "-ofmt"))) == NGRAM_INVALID) { E_ERROR("Invalid output type %s\n", cmd_ln_str_r(config, "-ofmt")); goto error_out; } } else { otype = ngram_file_name_to_type(cmd_ln_str_r(config, "-o")); } /* Recode the language model if desired. */ if (cmd_ln_str_r(config, "-ienc")) { if (ngram_model_recode(lm, cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")) != 0) { E_ERROR("Failed to recode language model from %s to %s\n", cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")); goto error_out; } } /* Case fold if requested. */ if ((kase = cmd_ln_str_r(config, "-case"))) { if (0 == strcmp(kase, "lower")) { ngram_model_casefold(lm, NGRAM_LOWER); } else if (0 == strcmp(kase, "upper")) { ngram_model_casefold(lm, NGRAM_UPPER); } else { E_ERROR("Unknown value for -case: %s\n", kase); goto error_out; } } /* Write the output language model. */ if (ngram_model_write(lm, cmd_ln_str_r(config, "-o"), otype) != 0) { E_ERROR("Failed to write language model in format %s to %s\n", ngram_type_to_str(otype), cmd_ln_str_r(config, "-o")); goto error_out; } /* That's all folks! */ ngram_model_free(lm); return 0; error_out: ngram_model_free(lm); return 1; }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyword_list; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ckd_salloc(ps_search_name(ps->phone_loop)), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyword_list = cmd_ln_str_r(config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, keyword_list)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(config, "-jsgf"))) { /* Or load a JSGF grammar */ fsg_model_t *fsg; jsgf_rule_t *rule; char const *toprule; jsgf_t *jsgf = jsgf_parse_file(path, NULL); if (!jsgf) return -1; rule = NULL; /* Take the -toprule if specified. */ if ((toprule = cmd_ln_str_r(config, "-toprule"))) { char *ruletok; ruletok = string_join("<", toprule, ">", NULL); rule = jsgf_get_rule(jsgf, ruletok); ckd_free(ruletok); if (rule == NULL) { E_ERROR("Start rule %s not found\n", toprule); return -1; } } else { /* Otherwise, take the first public rule. */ jsgf_rule_iter_t *itor; for (itor = jsgf_rule_iter(jsgf); itor; itor = jsgf_rule_iter_next(itor)) { rule = jsgf_rule_iter_rule(itor); if (jsgf_rule_public(rule)) { jsgf_rule_iter_free(itor); break; } } if (rule == NULL) { E_ERROR("No public rules found in %s\n", path); return -1; } } fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg); fsg_model_free(fsg); ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(ps->config, "-lm"))) { ngram_model_t *lm; lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); if (!lm) return -1; if (ps_set_lm(ps, PS_DEFAULT_SEARCH, lm)) { ngram_model_free(lm); return -1; } ngram_model_free(lm); ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_free(lm); ngram_model_set_iter_free(lmset_it); return -1; } ngram_model_free(lm); } name = cmd_ln_str_r(config, "-lmname"); if (name) ps_set_search(ps, name); else E_WARN("No default LM name (-lmname) for `-lmctl'\n"); } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
ngram_model_t * ngram_model_set_read(cmd_ln_t * config, const char *lmctlfile, logmath_t * lmath) { FILE *ctlfp; glist_t lms = NULL; glist_t lmnames = NULL; __BIGSTACKVARIABLE__ char str[1024]; ngram_model_t *set = NULL; hash_table_t *classes; char *basedir, *c; /* Read all the class definition files to accumulate a mapping of * classnames to definitions. */ classes = hash_table_new(0, FALSE); if ((ctlfp = fopen(lmctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open %s", lmctlfile); return NULL; } /* Try to find the base directory to append to relative paths in * the lmctl file. */ if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) { /* Include the trailing slash. */ basedir = ckd_calloc(c - lmctlfile + 2, 1); memcpy(basedir, lmctlfile, c - lmctlfile + 1); } else { basedir = NULL; } E_INFO("Reading LM control file '%s'\n", lmctlfile); if (basedir) E_INFO("Will prepend '%s' to unqualified paths\n", basedir); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* Load LMclass files */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { char *deffile; if (basedir && !path_is_absolute(str)) deffile = string_join(basedir, str, NULL); else deffile = ckd_salloc(str); E_INFO("Reading classdef from '%s'\n", deffile); if (read_classdef_file(classes, deffile) < 0) { ckd_free(deffile); goto error_out; } ckd_free(deffile); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } /* This might be the first LM name. */ if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; /* Read in one LM at a time and add classes to them as necessary. */ while (str[0] != '\0') { char *lmfile; ngram_model_t *lm; if (basedir && str[0] != '/' && str[0] != '\\') lmfile = string_join(basedir, str, NULL); else lmfile = ckd_salloc(str); E_INFO("Reading lm from '%s'\n", lmfile); lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath); if (lm == NULL) { ckd_free(lmfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) { E_ERROR("LMname missing after LMFileName '%s'\n", lmfile); ckd_free(lmfile); goto error_out; } ckd_free(lmfile); lms = glist_add_ptr(lms, lm); lmnames = glist_add_ptr(lmnames, ckd_salloc(str)); if (fscanf(ctlfp, "%1023s", str) == 1) { if (strcmp(str, "{") == 0) { /* LM uses classes; read their names */ while ((fscanf(ctlfp, "%1023s", str) == 1) && (strcmp(str, "}") != 0)) { void *val; classdef_t *classdef; if (hash_table_lookup(classes, str, &val) == -1) { E_ERROR("Unknown class %s in control file\n", str); goto error_out; } classdef = val; if (ngram_model_add_class(lm, str, 1.0, classdef->words, classdef->weights, classdef->n_words) < 0) { goto error_out; } E_INFO("Added class %s containing %d words\n", str, classdef->n_words); } if (strcmp(str, "}") != 0) { E_ERROR("Unexpected EOF in %s\n", lmctlfile); goto error_out; } if (fscanf(ctlfp, "%1023s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; } fclose(ctlfp); /* Now construct arrays out of lms and lmnames, and build an * ngram_model_set. */ lms = glist_reverse(lms); lmnames = glist_reverse(lmnames); { int32 n_models; ngram_model_t **lm_array; char **name_array; gnode_t *lm_node, *name_node; int32 i; n_models = glist_count(lms); lm_array = ckd_calloc(n_models, sizeof(*lm_array)); name_array = ckd_calloc(n_models, sizeof(*name_array)); lm_node = lms; name_node = lmnames; for (i = 0; i < n_models; ++i) { lm_array[i] = gnode_ptr(lm_node); name_array[i] = gnode_ptr(name_node); lm_node = gnode_next(lm_node); name_node = gnode_next(name_node); } set = ngram_model_set_init(config, lm_array, name_array, NULL, n_models); for (i = 0; i < n_models; ++i) { ngram_model_free(lm_array[i]); } ckd_free(lm_array); ckd_free(name_array); } error_out: { gnode_t *gn; glist_t hlist; if (set == NULL) { for (gn = lms; gn; gn = gnode_next(gn)) { ngram_model_free(gnode_ptr(gn)); } } glist_free(lms); for (gn = lmnames; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(lmnames); hlist = hash_table_tolist(classes, NULL); for (gn = hlist; gn; gn = gnode_next(gn)) { hash_entry_t *he = gnode_ptr(gn); ckd_free((char *) he->key); classdef_free(he->val); } glist_free(hlist); hash_table_free(classes); ckd_free(basedir); } return set; }
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t * mdef, logmath_t *logmath) { FILE *fp, *fp2; int32 n; lineiter_t *li; dict_t *d; s3cipid_t sil; char const *dictfile = NULL, *fillerfile = NULL, *arpafile = NULL; if (config) { dictfile = cmd_ln_str_r(config, "-dict"); fillerfile = cmd_ln_str_r(config, "-fdict"); arpafile = string_join(dictfile, ".dmp", NULL); } /* * First obtain #words in dictionary (for hash table allocation). * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate * all the required memory in one go. */ fp = NULL; n = 0; if (dictfile) { if ((fp = fopen(dictfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile); return NULL; } for (li = lineiter_start(fp); li; li = lineiter_next(li)) { if (0 != strncmp(li->buf, "##", 2) && 0 != strncmp(li->buf, ";;", 2)) n++; } fseek(fp, 0L, SEEK_SET); } fp2 = NULL; if (fillerfile) { if ((fp2 = fopen(fillerfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile); fclose(fp); return NULL; } for (li = lineiter_start(fp2); li; li = lineiter_next(li)) { if (0 != strncmp(li->buf, "##", 2) && 0 != strncmp(li->buf, ";;", 2)) n++; } fseek(fp2, 0L, SEEK_SET); } /* * Allocate dict entries. HACK!! Allow some extra entries for words not in file. * Also check for type size restrictions. */ d = (dict_t *) ckd_calloc(1, sizeof(dict_t)); /* freed in dict_free() */ if (arpafile) { ngram_model_t *ngram_g2p_model = ngram_model_read(NULL,arpafile,NGRAM_AUTO,logmath); if (!ngram_g2p_model) { E_ERROR("No arpa model found \n"); return NULL; } d->ngram_g2p_model = ngram_g2p_model; ckd_free(arpafile); } d->refcnt = 1; d->max_words = (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID; if (n >= MAX_S3WID) { E_ERROR("Number of words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_S3WID); fclose(fp); fclose(fp2); ckd_free(d); return NULL; } E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n", d->max_words, sizeof(dictword_t), d->max_words * sizeof(dictword_t) / 1024); d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t)); /* freed in dict_free() */ d->n_word = 0; if (mdef) d->mdef = bin_mdef_retain(mdef); /* Create new hash table for word strings; case-insensitive word strings */ if (config && cmd_ln_exists_r(config, "-dictcase")) d->nocase = cmd_ln_boolean_r(config, "-dictcase"); d->ht = hash_table_new(d->max_words, d->nocase); /* Digest main dictionary file */ if (fp) { E_INFO("Reading main dictionary: %s\n", dictfile); dict_read(fp, d); fclose(fp); E_INFO("%d words read\n", d->n_word); } /* Now the filler dictionary file, if it exists */ d->filler_start = d->n_word; if (fillerfile) { E_INFO("Reading filler dictionary: %s\n", fillerfile); dict_read(fp2, d); fclose(fp2); E_INFO("%d words read\n", d->n_word - d->filler_start); } if (mdef) sil = bin_mdef_silphone(mdef); else sil = 0; if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) { dict_add_word(d, S3_START_WORD, &sil, 1); } if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) { dict_add_word(d, S3_FINISH_WORD, &sil, 1); } if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) { dict_add_word(d, S3_SILENCE_WORD, &sil, 1); } d->filler_end = d->n_word - 1; /* Initialize distinguished word-ids */ d->startwid = dict_wordid(d, S3_START_WORD); d->finishwid = dict_wordid(d, S3_FINISH_WORD); d->silwid = dict_wordid(d, S3_SILENCE_WORD); if ((d->filler_start > d->filler_end) || (!dict_filler_word(d, d->silwid))) { E_ERROR("Word '%s' must occur (only) in filler dictionary\n", S3_SILENCE_WORD); dict_free(d); return NULL; } /* No check that alternative pronunciations for filler words are in filler range!! */ return d; }