int
main(int argc, char *argv[])
{
	logmath_t *lmath;
	ngram_model_t *model;

	/* Initialize a logmath object to pass to ngram_read */
	lmath = logmath_init(1.0001, 0, 0);

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_BIN, lmath);
	TEST_ASSERT(model);

	ngram_model_casefold(model, NGRAM_UPPER);

	TEST_EQUAL(0, strcmp("</s>", ngram_word(model, 5)));
	TEST_EQUAL(0, strcmp("BE", ngram_word(model, 42)));
	TEST_EQUAL(0, strcmp("FLOORED", ngram_word(model, 130)));
	TEST_EQUAL(0, strcmp("ZERO", ngram_word(model, 398)));
	TEST_EQUAL(0, strcmp("~", ngram_word(model, 399)));

	ngram_model_casefold(model, NGRAM_LOWER);

	TEST_EQUAL(0, strcmp("</s>", ngram_word(model, 5)));
	TEST_EQUAL(0, strcmp("be", ngram_word(model, 42)));
	TEST_EQUAL(0, strcmp("floored", ngram_word(model, 130)));
	TEST_EQUAL(0, strcmp("zero", ngram_word(model, 398)));
	TEST_EQUAL(0, strcmp("~", ngram_word(model, 399)));

	ngram_model_free(model);
	logmath_free(lmath);

	return 0;
}
Exemple #2
0
int
main(int argc, char *argv[])
{
	logmath_t *lmath;
	ngram_model_t *model;

	/* Initialize a logmath object to pass to ngram_read */
	lmath = logmath_init(1.0001, 0, 0);

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm", NGRAM_ARPA, lmath);
	test_lm_ug_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/turtle.ug.lm.dmp", NGRAM_BIN, lmath);
	test_lm_ug_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model with missing backoffs */
	model = ngram_model_read(NULL, LMDIR "/104.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read corrupted language model, error expected */
	model = ngram_model_read(NULL, LMDIR "/105.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(NULL, model);

	/* Read corrupted language model, error expected */
	model = ngram_model_read(NULL, LMDIR "/106.lm.gz", NGRAM_ARPA, lmath);
	TEST_EQUAL(NULL, model);

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.bz2", NGRAM_ARPA, lmath);
	test_lm_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.bin", NGRAM_BIN, lmath);
	test_lm_vals(model);
	TEST_EQUAL(0, ngram_model_free(model));

	/* Read a language model */
	model = ngram_model_read(NULL, LMDIR "/100.lm.dmp", NGRAM_BIN, lmath);
	test_lm_vals(model);
	/* Test refcounting. */
	model = ngram_model_retain(model);
	TEST_EQUAL(1, ngram_model_free(model));
	TEST_EQUAL(ngram_score(model, "daines", "huggins", "david", NULL), -9452);
	TEST_EQUAL(0, ngram_model_free(model));


	logmath_free(lmath);

	return 0;
}
int
main(int argc, char *argv[])
{
	logmath_t *lmath;
	ngram_model_t *model;

	lmath = logmath_init(1.0001, 0, 0);

	model = ngram_model_read(NULL, LMDIR "/100.arpa.DMP", NGRAM_DMP, lmath);
	run_tests(model);
	ngram_model_free(model);

	model = ngram_model_read(NULL, LMDIR "/100.arpa.gz", NGRAM_ARPA, lmath);
	run_tests(model);
	ngram_model_free(model);

	logmath_free(lmath);
	return 0;
}
Exemple #4
0
int
ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
{
  ngram_model_t *lm;
  int result;

  lm = NULL;
  if (path)
    lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
  result = ps_set_allphone(ps, name, lm);
  if (lm)
      ngram_model_free(lm);
  return result;
}
Exemple #5
0
int
ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
{
  ngram_model_t *lm;
  int result;

  lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
  if (!lm)
      return -1;

  result = ps_set_lm(ps, name, lm);
  ngram_model_free(lm);
  return result;
}
int
main(int argc, char *argv[])
{
	cmd_ln_t *config;
	ngram_model_t *lm = NULL;
	logmath_t *lmath;
	const char *lmfn, *probdefn, *lsnfn, *text;

	if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
		return 1;

        verbose = cmd_ln_boolean_r(config, "-verbose");

	/* Create log math object. */
	if ((lmath = logmath_init
	     (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) {
		E_FATAL("Failed to initialize log math\n");
	}

	/* Load the language model. */
	lmfn = cmd_ln_str_r(config, "-lm");
	if (lmfn == NULL
	    || (lm = ngram_model_read(config, lmfn,
				      NGRAM_AUTO, lmath)) == NULL) {
		E_FATAL("Failed to load language model from %s\n",
			cmd_ln_str_r(config, "-lm"));
	}
        if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL)
            ngram_model_read_classdef(lm, probdefn);
        ngram_model_apply_weights(lm,
                                  cmd_ln_float32_r(config, "-lw"),
                                  cmd_ln_float32_r(config, "-wip"),
                                  cmd_ln_float32_r(config, "-uw"));

	/* Now evaluate some text. */
	lsnfn = cmd_ln_str_r(config, "-lsn");
	text = cmd_ln_str_r(config, "-text");
	if (lsnfn) {
		evaluate_file(lm, lmath, lsnfn);
	}
	else if (text) {
		evaluate_string(lm, lmath, text);
	}

	return 0;
}
static void
test_set_search()
{
    cmd_ln_t *config = default_config();
    ps_decoder_t *ps = ps_init(config);
    ps_search_iter_t *itor;

    jsgf_t *jsgf = jsgf_parse_file(DATADIR "/goforward.gram", NULL);
    fsg_model_t *fsg = jsgf_build_fsg(jsgf,
                                      jsgf_get_rule(jsgf, "goforward.move"),
                                      ps->lmath, cmd_ln_int32_r(config, "-lw"));
    TEST_ASSERT(!ps_set_fsg(ps, "goforward", fsg));
    fsg_model_free(fsg);

    TEST_ASSERT(!ps_set_jsgf_file(ps, "goforward_other", DATADIR "/goforward.gram"));

    ngram_model_t *lm = ngram_model_read(config, DATADIR "/tidigits/lm/tidigits.lm.dmp",
                                         NGRAM_AUTO, ps->lmath);
    TEST_ASSERT(!ps_set_lm(ps, "tidigits", lm));
    ngram_model_free(lm);

    TEST_ASSERT(!ps_set_search(ps, "tidigits"));

    TEST_ASSERT(!ps_set_search(ps, "goforward"));
    
    itor = ps_search_iter(ps);
    TEST_EQUAL(0, strcmp("goforward_other", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(0, strcmp("tidigits", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(0, strcmp("goforward", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(0, strcmp("phone_loop", ps_search_iter_val(itor)));
    itor = ps_search_iter_next(itor);
    TEST_EQUAL(NULL, itor);
    
    TEST_ASSERT(!ps_start_utt(ps));
    TEST_ASSERT(!ps_end_utt(ps));
    ps_free(ps);
    cmd_ln_free_r(config);
}
Exemple #8
0
int main() {
    logmath_t *logmath;
    ngram_model_t *model;
    FILE *fp;
    char *line = NULL, *grapheme, *phoneme, *predicted_phoneme;
    int32 different_word_count = 0, fit_count = 0;
    size_t len = 0;

    err_set_logfp(NULL);
    logmath = logmath_init(1.0001f, 0, 0);
    model = ngram_model_read(NULL, "cmudict-en-us.dmp", NGRAM_AUTO, logmath);

    fp = fopen("cmudict-en-us.dict", "r");

    while (getline(&line, &len, fp) != -1) {
        grapheme = strtok(line, " ");
        phoneme = strtok(NULL, "\n");

        if (strstr(grapheme, "(")) {
            grapheme = strtok(grapheme, "(");
        } else {
            different_word_count++;
        }

        predicted_phoneme = g2p(model, grapheme, 100);
        if (predicted_phoneme && strcmp(phoneme, predicted_phoneme) == 0) {
            fit_count++;
        }

        ckd_free(predicted_phoneme);
    }

    printf("%d %d %f\n", fit_count, different_word_count, fit_count * 1.0 / different_word_count);

    ckd_free(grapheme);
    fclose(fp);
    ngram_model_free(model);
    logmath_free(logmath);
}
int
main(int argc, char *argv[])
{
	cmd_ln_t *config;
	ngram_model_t *lm = NULL;
	logmath_t *lmath;
        int itype, otype;
        char const *kase;

	if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
		return 1;
		
	if (cmd_ln_boolean_r(config, "-help")) {
	    usagemsg(argv[0]);
	}

        err_set_debug_level(cmd_ln_int32_r(config, "-debug"));

	/* Create log math object. */
	if ((lmath = logmath_init
	     (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) {
		E_FATAL("Failed to initialize log math\n");
	}
	
	if (cmd_ln_str_r(config, "-i") == NULL || cmd_ln_str_r(config, "-i") == NULL) {
            E_ERROR("Please specify both input and output models\n");
            goto error_out;
        }
	    
	
	/* Load the input language model. */
        if (cmd_ln_str_r(config, "-ifmt")) {
            if ((itype = ngram_str_to_type(cmd_ln_str_r(config, "-ifmt")))
                == NGRAM_INVALID) {
                E_ERROR("Invalid input type %s\n", cmd_ln_str_r(config, "-ifmt"));
                goto error_out;
            }
            lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"),
                                  itype, lmath);
        }
        else {
            lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"),
                                  NGRAM_AUTO, lmath);
	}

        /* Guess or set the output language model type. */
        if (cmd_ln_str_r(config, "-ofmt")) {
            if ((otype = ngram_str_to_type(cmd_ln_str_r(config, "-ofmt")))
                == NGRAM_INVALID) {
                E_ERROR("Invalid output type %s\n", cmd_ln_str_r(config, "-ofmt"));
                goto error_out;
            }
        }
        else {
            otype = ngram_file_name_to_type(cmd_ln_str_r(config, "-o"));
        }

        /* Recode the language model if desired. */
        if (cmd_ln_str_r(config, "-ienc")) {
            if (ngram_model_recode(lm, cmd_ln_str_r(config, "-ienc"),
                                   cmd_ln_str_r(config, "-oenc")) != 0) {
                E_ERROR("Failed to recode language model from %s to %s\n",
                        cmd_ln_str_r(config, "-ienc"),
                        cmd_ln_str_r(config, "-oenc"));
                goto error_out;
            }
        }

        /* Case fold if requested. */
        if ((kase = cmd_ln_str_r(config, "-case"))) {
            if (0 == strcmp(kase, "lower")) {
                ngram_model_casefold(lm, NGRAM_LOWER);
            }
            else if (0 == strcmp(kase, "upper")) {
                ngram_model_casefold(lm, NGRAM_UPPER);
            }
            else {
                E_ERROR("Unknown value for -case: %s\n", kase);
                goto error_out;
            }
        }

        /* Write the output language model. */
        if (ngram_model_write(lm, cmd_ln_str_r(config, "-o"), otype) != 0) {
            E_ERROR("Failed to write language model in format %s to %s\n",
                    ngram_type_to_str(otype), cmd_ln_str_r(config, "-o"));
            goto error_out;
        }

        /* That's all folks! */
        ngram_model_free(lm);
	return 0;

error_out:
        ngram_model_free(lm);
	return 1;
}
int
ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
{
    const char *path;
    const char *keyword_list;
    int32 lw;

    if (config && config != ps->config) {
        cmd_ln_free_r(ps->config);
        ps->config = cmd_ln_retain(config);
    }

    err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
    ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
    ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
    ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");

    /* Fill in some default arguments. */
    ps_init_defaults(ps);

    /* Free old searches (do this before other reinit) */
    ps_free_searches(ps);
    ps->searches = hash_table_new(3, HASH_CASE_YES);

    /* Free old acmod. */
    acmod_free(ps->acmod);
    ps->acmod = NULL;

    /* Free old dictionary (must be done after the two things above) */
    dict_free(ps->dict);
    ps->dict = NULL;

    /* Free d2p */
    dict2pid_free(ps->d2p);
    ps->d2p = NULL;

    /* Logmath computation (used in acmod and search) */
    if (ps->lmath == NULL
        || (logmath_get_base(ps->lmath) !=
            (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
        if (ps->lmath)
            logmath_free(ps->lmath);
        ps->lmath = logmath_init
            ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
             cmd_ln_boolean_r(ps->config, "-bestpath"));
    }

    /* Acoustic model (this is basically everything that
     * uttproc.c, senscr.c, and others used to do) */
    if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
        return -1;

    if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) {
        /* Initialize an auxiliary phone loop search, which will run in
         * "parallel" with FSG or N-Gram search. */
        if ((ps->phone_loop =
             phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL)
            return -1;
        hash_table_enter(ps->searches,
                         ckd_salloc(ps_search_name(ps->phone_loop)),
                         ps->phone_loop);
    }

    /* Dictionary and triphone mappings (depends on acmod). */
    /* FIXME: pass config, change arguments, implement LTS, etc. */
    if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
        return -1;
    if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
        return -1;

    lw = cmd_ln_float32_r(config, "-lw");

    /* Determine whether we are starting out in FSG or N-Gram search mode.
     * If neither is used skip search initialization. */

    /* Load KWS if one was specified in config */
    if ((keyword_list = cmd_ln_str_r(config, "-kws"))) {
        if (ps_set_kws(ps, PS_DEFAULT_SEARCH, keyword_list))
            return -1;
        ps_set_search(ps, PS_DEFAULT_SEARCH);
    }

    /* Load an FSG if one was specified in config */
    if ((path = cmd_ln_str_r(config, "-fsg"))) {
        fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw);
        if (!fsg)
            return -1;
        if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg))
            return -1;
        ps_set_search(ps, PS_DEFAULT_SEARCH);
    }
    
    if ((path = cmd_ln_str_r(config, "-jsgf"))) {
        /* Or load a JSGF grammar */
        fsg_model_t *fsg;
        jsgf_rule_t *rule;
        char const *toprule;
        jsgf_t *jsgf = jsgf_parse_file(path, NULL);

        if (!jsgf)
            return -1;

        rule = NULL;
        /* Take the -toprule if specified. */
        if ((toprule = cmd_ln_str_r(config, "-toprule"))) {
            char *ruletok;
            ruletok = string_join("<", toprule, ">", NULL);
            rule = jsgf_get_rule(jsgf, ruletok);
            ckd_free(ruletok);
            if (rule == NULL) {
                E_ERROR("Start rule %s not found\n", toprule);
                return -1;
            }
        } else {
            /* Otherwise, take the first public rule. */
            jsgf_rule_iter_t *itor;

            for (itor = jsgf_rule_iter(jsgf); itor;
                 itor = jsgf_rule_iter_next(itor)) {
                rule = jsgf_rule_iter_rule(itor);
                if (jsgf_rule_public(rule)) {
                    jsgf_rule_iter_free(itor);
                    break;
                }
            }
            if (rule == NULL) {
                E_ERROR("No public rules found in %s\n", path);
                return -1;
            }
        }

        fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
        ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg);
        fsg_model_free(fsg);
        ps_set_search(ps, PS_DEFAULT_SEARCH);
    }

    if ((path = cmd_ln_str_r(ps->config, "-lm"))) {
        ngram_model_t *lm;

        lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath);
        if (!lm)
            return -1;

        if (ps_set_lm(ps, PS_DEFAULT_SEARCH, lm)) {
            ngram_model_free(lm);
            return -1;
        }
        ngram_model_free(lm);
        ps_set_search(ps, PS_DEFAULT_SEARCH);
    }

    if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) {
        const char *name;
        ngram_model_t *lmset;
        ngram_model_set_iter_t *lmset_it;

        if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) {
            E_ERROR("Failed to read language model control file: %s\n", path);
            return -1;
        }

        for(lmset_it = ngram_model_set_iter(lmset);
            lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) {
            
            ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name);            
            E_INFO("adding search %s\n", name);
            if (ps_set_lm(ps, name, lm)) {
            ngram_model_free(lm);
                ngram_model_set_iter_free(lmset_it);
                return -1;
            }
        ngram_model_free(lm);
        }

        name = cmd_ln_str_r(config, "-lmname");
        if (name)
            ps_set_search(ps, name);
        else
            E_WARN("No default LM name (-lmname) for `-lmctl'\n");
    }

    /* Initialize performance timer. */
    ps->perf.name = "decode";
    ptmr_init(&ps->perf);

    return 0;
}
ngram_model_t *
ngram_model_set_read(cmd_ln_t * config,
                     const char *lmctlfile, logmath_t * lmath)
{
    FILE *ctlfp;
    glist_t lms = NULL;
    glist_t lmnames = NULL;
    __BIGSTACKVARIABLE__ char str[1024];
    ngram_model_t *set = NULL;
    hash_table_t *classes;
    char *basedir, *c;

    /* Read all the class definition files to accumulate a mapping of
     * classnames to definitions. */
    classes = hash_table_new(0, FALSE);
    if ((ctlfp = fopen(lmctlfile, "r")) == NULL) {
        E_ERROR_SYSTEM("Failed to open %s", lmctlfile);
        return NULL;
    }

    /* Try to find the base directory to append to relative paths in
     * the lmctl file. */
    if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) {
        /* Include the trailing slash. */
        basedir = ckd_calloc(c - lmctlfile + 2, 1);
        memcpy(basedir, lmctlfile, c - lmctlfile + 1);
    }
    else {
        basedir = NULL;
    }
    E_INFO("Reading LM control file '%s'\n", lmctlfile);
    if (basedir)
        E_INFO("Will prepend '%s' to unqualified paths\n", basedir);

    if (fscanf(ctlfp, "%1023s", str) == 1) {
        if (strcmp(str, "{") == 0) {
            /* Load LMclass files */
            while ((fscanf(ctlfp, "%1023s", str) == 1)
                   && (strcmp(str, "}") != 0)) {
                char *deffile;
                if (basedir && !path_is_absolute(str))
                    deffile = string_join(basedir, str, NULL);
                else
                    deffile = ckd_salloc(str);
                E_INFO("Reading classdef from '%s'\n", deffile);
                if (read_classdef_file(classes, deffile) < 0) {
                    ckd_free(deffile);
                    goto error_out;
                }
                ckd_free(deffile);
            }

            if (strcmp(str, "}") != 0) {
                E_ERROR("Unexpected EOF in %s\n", lmctlfile);
                goto error_out;
            }

            /* This might be the first LM name. */
            if (fscanf(ctlfp, "%1023s", str) != 1)
                str[0] = '\0';
        }
    }
    else
        str[0] = '\0';

    /* Read in one LM at a time and add classes to them as necessary. */
    while (str[0] != '\0') {
        char *lmfile;
        ngram_model_t *lm;

        if (basedir && str[0] != '/' && str[0] != '\\')
            lmfile = string_join(basedir, str, NULL);
        else
            lmfile = ckd_salloc(str);
        E_INFO("Reading lm from '%s'\n", lmfile);
        lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath);
        if (lm == NULL) {
            ckd_free(lmfile);
            goto error_out;
        }
        if (fscanf(ctlfp, "%1023s", str) != 1) {
            E_ERROR("LMname missing after LMFileName '%s'\n", lmfile);
            ckd_free(lmfile);
            goto error_out;
        }
        ckd_free(lmfile);
        lms = glist_add_ptr(lms, lm);
        lmnames = glist_add_ptr(lmnames, ckd_salloc(str));

        if (fscanf(ctlfp, "%1023s", str) == 1) {
            if (strcmp(str, "{") == 0) {
                /* LM uses classes; read their names */
                while ((fscanf(ctlfp, "%1023s", str) == 1) &&
                       (strcmp(str, "}") != 0)) {
                    void *val;
                    classdef_t *classdef;

                    if (hash_table_lookup(classes, str, &val) == -1) {
                        E_ERROR("Unknown class %s in control file\n", str);
                        goto error_out;
                    }
                    classdef = val;
                    if (ngram_model_add_class(lm, str, 1.0,
                                              classdef->words,
                                              classdef->weights,
                                              classdef->n_words) < 0) {
                        goto error_out;
                    }
                    E_INFO("Added class %s containing %d words\n",
                           str, classdef->n_words);
                }
                if (strcmp(str, "}") != 0) {
                    E_ERROR("Unexpected EOF in %s\n", lmctlfile);
                    goto error_out;
                }
                if (fscanf(ctlfp, "%1023s", str) != 1)
                    str[0] = '\0';
            }
        }
        else
            str[0] = '\0';
    }
    fclose(ctlfp);

    /* Now construct arrays out of lms and lmnames, and build an
     * ngram_model_set. */
    lms = glist_reverse(lms);
    lmnames = glist_reverse(lmnames);
    {
        int32 n_models;
        ngram_model_t **lm_array;
        char **name_array;
        gnode_t *lm_node, *name_node;
        int32 i;

        n_models = glist_count(lms);
        lm_array = ckd_calloc(n_models, sizeof(*lm_array));
        name_array = ckd_calloc(n_models, sizeof(*name_array));
        lm_node = lms;
        name_node = lmnames;
        for (i = 0; i < n_models; ++i) {
            lm_array[i] = gnode_ptr(lm_node);
            name_array[i] = gnode_ptr(name_node);
            lm_node = gnode_next(lm_node);
            name_node = gnode_next(name_node);
        }
        set = ngram_model_set_init(config, lm_array, name_array,
                                   NULL, n_models);

        for (i = 0; i < n_models; ++i) {
            ngram_model_free(lm_array[i]);
        }
        ckd_free(lm_array);
        ckd_free(name_array);
    }
  error_out:
    {
        gnode_t *gn;
        glist_t hlist;

        if (set == NULL) {
            for (gn = lms; gn; gn = gnode_next(gn)) {
                ngram_model_free(gnode_ptr(gn));
            }
        }
        glist_free(lms);
        for (gn = lmnames; gn; gn = gnode_next(gn)) {
            ckd_free(gnode_ptr(gn));
        }
        glist_free(lmnames);
        hlist = hash_table_tolist(classes, NULL);
        for (gn = hlist; gn; gn = gnode_next(gn)) {
            hash_entry_t *he = gnode_ptr(gn);
            ckd_free((char *) he->key);
            classdef_free(he->val);
        }
        glist_free(hlist);
        hash_table_free(classes);
        ckd_free(basedir);
    }
    return set;
}
Exemple #12
0
dict_t *
dict_init(cmd_ln_t *config, bin_mdef_t * mdef, logmath_t *logmath)
{
    FILE *fp, *fp2;
    int32 n;
    lineiter_t *li;
    dict_t *d;
    s3cipid_t sil;
    char const *dictfile = NULL, *fillerfile = NULL, *arpafile = NULL;

    if (config) {
        dictfile = cmd_ln_str_r(config, "-dict");
        fillerfile = cmd_ln_str_r(config, "-fdict");
        arpafile = string_join(dictfile, ".dmp",  NULL);
    }

    /*
     * First obtain #words in dictionary (for hash table allocation).
     * Reason: The PC NT system doesn't like to grow memory gradually.  Better to allocate
     * all the required memory in one go.
     */
    fp = NULL;
    n = 0;
    if (dictfile) {
        if ((fp = fopen(dictfile, "r")) == NULL) {
            E_ERROR_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile);
            return NULL;
        }
        for (li = lineiter_start(fp); li; li = lineiter_next(li)) {
            if (0 != strncmp(li->buf, "##", 2)
                && 0 != strncmp(li->buf, ";;", 2))
                n++;
        }
	fseek(fp, 0L, SEEK_SET);
    }

    fp2 = NULL;
    if (fillerfile) {
        if ((fp2 = fopen(fillerfile, "r")) == NULL) {
            E_ERROR_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile);
            fclose(fp);
            return NULL;
	}
        for (li = lineiter_start(fp2); li; li = lineiter_next(li)) {
	    if (0 != strncmp(li->buf, "##", 2)
    	        && 0 != strncmp(li->buf, ";;", 2))
                n++;
        }
        fseek(fp2, 0L, SEEK_SET);
    }

    /*
     * Allocate dict entries.  HACK!!  Allow some extra entries for words not in file.
     * Also check for type size restrictions.
     */
    d = (dict_t *) ckd_calloc(1, sizeof(dict_t));       /* freed in dict_free() */

    if (arpafile) {
        ngram_model_t *ngram_g2p_model = ngram_model_read(NULL,arpafile,NGRAM_AUTO,logmath);
        if (!ngram_g2p_model) {
            E_ERROR("No arpa model found  \n");
            return NULL;
        }
        d->ngram_g2p_model = ngram_g2p_model;
        ckd_free(arpafile);
    }

    d->refcnt = 1;
    d->max_words =
        (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID;
    if (n >= MAX_S3WID) {
        E_ERROR("Number of words in dictionaries (%d) exceeds limit (%d)\n", n,
                MAX_S3WID);
        fclose(fp);
        fclose(fp2);
        ckd_free(d);
        return NULL;
    }

    E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n",
           d->max_words, sizeof(dictword_t),
           d->max_words * sizeof(dictword_t) / 1024);
    d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t));      /* freed in dict_free() */
    d->n_word = 0;
    if (mdef)
        d->mdef = bin_mdef_retain(mdef);

    /* Create new hash table for word strings; case-insensitive word strings */
    if (config && cmd_ln_exists_r(config, "-dictcase"))
        d->nocase = cmd_ln_boolean_r(config, "-dictcase");
    d->ht = hash_table_new(d->max_words, d->nocase);

    /* Digest main dictionary file */
    if (fp) {
        E_INFO("Reading main dictionary: %s\n", dictfile);
        dict_read(fp, d);
        fclose(fp);
        E_INFO("%d words read\n", d->n_word);
    }

    /* Now the filler dictionary file, if it exists */
    d->filler_start = d->n_word;
    if (fillerfile) {
        E_INFO("Reading filler dictionary: %s\n", fillerfile);
        dict_read(fp2, d);
        fclose(fp2);
        E_INFO("%d words read\n", d->n_word - d->filler_start);
    }
    if (mdef)
        sil = bin_mdef_silphone(mdef);
    else
        sil = 0;
    if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) {
        dict_add_word(d, S3_START_WORD, &sil, 1);
    }
    if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) {
        dict_add_word(d, S3_FINISH_WORD, &sil, 1);
    }
    if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) {
        dict_add_word(d, S3_SILENCE_WORD, &sil, 1);
    }

    d->filler_end = d->n_word - 1;

    /* Initialize distinguished word-ids */
    d->startwid = dict_wordid(d, S3_START_WORD);
    d->finishwid = dict_wordid(d, S3_FINISH_WORD);
    d->silwid = dict_wordid(d, S3_SILENCE_WORD);

    if ((d->filler_start > d->filler_end)
        || (!dict_filler_word(d, d->silwid))) {
        E_ERROR("Word '%s' must occur (only) in filler dictionary\n",
                S3_SILENCE_WORD);
        dict_free(d);
        return NULL;
    }

    /* No check that alternative pronunciations for filler words are in filler range!! */

    return d;
}