ngram_model_t *
ngram_model_set_init(cmd_ln_t * config,
                     ngram_model_t ** models,
                     char **names, const float32 * weights, int32 n_models)
{
    ngram_model_set_t *model;
    ngram_model_t *base;
    logmath_t *lmath;
    int32 i, n;

    if (n_models == 0)          /* WTF */
        return NULL;

    /* Do consistency checking on the models.  They must all use the
     * same logbase and shift. */
    lmath = models[0]->lmath;
    for (i = 1; i < n_models; ++i) {
        if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath)
            || logmath_get_shift(models[i]->lmath) !=
            logmath_get_shift(lmath)) {
            E_ERROR
                ("Log-math parameters don't match, will not create LM set\n");
            return NULL;
        }
    }

    /* Allocate the combined model, initialize it. */
    model = ckd_calloc(1, sizeof(*model));
    base = &model->base;
    model->n_models = n_models;
    model->lms = ckd_calloc(n_models, sizeof(*model->lms));
    model->names = ckd_calloc(n_models, sizeof(*model->names));
    /* Initialize weights to a uniform distribution */
    model->lweights = ckd_calloc(n_models, sizeof(*model->lweights));
    {
        int32 uniform = logmath_log(lmath, 1.0 / n_models);
        for (i = 0; i < n_models; ++i)
            model->lweights[i] = uniform;
    }
    /* Default to interpolate if weights were given. */
    if (weights)
        model->cur = -1;

    n = 0;
    for (i = 0; i < n_models; ++i) {
        model->lms[i] = ngram_model_retain(models[i]);
        model->names[i] = ckd_salloc(names[i]);
        if (weights)
            model->lweights[i] = logmath_log(lmath, weights[i]);
        /* N is the maximum of all merged models. */
        if (models[i]->n > n)
            n = models[i]->n;
    }
    /* Allocate the history mapping table. */
    model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist));

    /* Now build the word-ID mapping and merged vocabulary. */
    build_widmap(base, lmath, n);
    return base;
}
Exemple #2
0
static int
acmod_read_senfh_header(acmod_t *acmod)
{
    char **name, **val;
    int32 swap;
    int i;

    if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0)
        goto error_out;
    for (i = 0; name[i] != NULL; ++i) {
        if (!strcmp(name[i], "n_sen")) {
            if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) {
                E_ERROR("Number of senones in senone file (%d) does not "
                        "match mdef (%d)\n", atoi(val[i]),
                        bin_mdef_n_sen(acmod->mdef));
                goto error_out;
            }
        }

        if (!strcmp(name[i], "logbase")) {
            if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) {
                E_ERROR("Logbase in senone file (%f) does not match acmod "
                        "(%f)\n", atof_c(val[i]),
                        logmath_get_base(acmod->lmath));
                goto error_out;
            }
        }
    }
    acmod->insen_swap = swap;
    bio_hdrarg_free(name, val);
    return 0;
error_out:
    bio_hdrarg_free(name, val);
    return -1;
}
static void
evaluate_string(ngram_model_t *lm, logmath_t *lmath, const char *text)
{
	char *textfoo;
	char **words;
	int32 n, ch, noovs, nccs, lscr;

	/* Split it into an array of strings. */
	textfoo = ckd_salloc(text);
	n = str2words(textfoo, NULL, 0);
	if (n < 0)
		E_FATAL("str2words(textfoo, NULL, 0) = %d, should not happen\n", n);
	if (n == 0) /* Do nothing! */
		return;
	words = ckd_calloc(n, sizeof(*words));
	str2words(textfoo, words, n);

	ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr);

	printf("input: %s\n", text);
	printf("cross-entropy: %f bits\n",
	       ch * log(logmath_get_base(lmath)) / log(2));

	/* Calculate perplexity pplx = exp CH */
	printf("perplexity: %f\n", logmath_exp(lmath, ch));
        printf("lm score: %d\n", lscr);

	/* Report OOVs and CCs */
	printf("%d words evaluated\n", n);
	printf("%d OOVs, %d context cues removed\n",
	      noovs, nccs);

	ckd_free(textfoo);
	ckd_free(words);
}
static void
evaluate_file(ngram_model_t *lm, logmath_t *lmath, const char *lsnfn)
{
	FILE *fh;
        lineiter_t *litor;
	int32 nccs, noovs, nwords, lscr;
	float64 ch, log_to_log2;;

	if ((fh = fopen(lsnfn, "r")) == NULL)
		E_FATAL_SYSTEM("failed to open transcript file %s", lsnfn);

	/* We have to keep ch in floating-point to avoid overflows, so
	 * we might as well use log2. */
	log_to_log2 = log(logmath_get_base(lmath)) / log(2);
	lscr = nccs = noovs = nwords = 0;
	ch = 0.0;
        for (litor = lineiter_start(fh); litor; litor = lineiter_next(litor)) {
		char **words;
		int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr;

		n = str2words(litor->buf, NULL, 0);
		if (n < 0)
			E_FATAL("str2words(line, NULL, 0) = %d, should not happen\n", n);
		if (n == 0) /* Do nothing! */
			continue;
		words = ckd_calloc(n, sizeof(*words));
		str2words(litor->buf, words, n);

		/* Remove any utterance ID (FIXME: has to be a single "word") */
		if (words[n-1][0] == '('
		    && words[n-1][strlen(words[n-1])-1] == ')')
			n = n - 1;

		tmp_ch = calc_entropy(lm, words, n, &tmp_nccs,
                                      &tmp_noovs, &tmp_lscr);

		ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2;
		nccs += tmp_nccs;
		noovs += tmp_noovs;
                lscr += tmp_lscr;
		nwords += n;
		
		ckd_free(words);
	}

	ch /= (nwords - nccs - noovs);
	printf("cross-entropy: %f bits\n", ch);

	/* Calculate perplexity pplx = exp CH */
	printf("perplexity: %f\n", pow(2.0, ch));
        printf("lm score: %d\n", lscr);

	/* Report OOVs and CCs */
	printf("%d words evaluated\n", nwords);
	printf("%d OOVs (%.2f%%), %d context cues removed\n",
	       noovs, (double)noovs / nwords * 100, nccs);
}
Exemple #5
0
int32
subvq_mgau_eval(mgau_model_t * g, subvq_t * vq, int32 m, int32 n,
                int32 * active)
{
    mgau_t *mgau;
    int32 *map;
    int32 i, v, sv_id;
    int32 c;
    int32 *vqdist;
    int32 score;
    int32 last_active;

    float64 f;
    f = 1.0 / log(logmath_get_base(g->logmath));

    vqdist = vq->vqdist[0];
    score = S3_LOGPROB_ZERO;
    mgau = &(g->mgau[m]);
    map = vq->map[m][0];

    if (!active) {
        for (i = 0; i < n; i++) {
            v = 0;
            for (sv_id = 0; sv_id < vq->n_sv; sv_id++) {
                v += vqdist[*(map++)];
            }
            score = logmath_add(g->logmath, score, v + mgau->mixw[i]);
        }
    }
    else {
        last_active = 0;
        for (i = 0; active[i] >= 0; i++) {
            c = active[i];
        }
        for (i = 0; active[i] >= 0; i++) {
            c = active[i];
            map += (c - last_active) * vq->n_sv;
            v = 0;
            for (sv_id = 0; sv_id < vq->n_sv; sv_id++) {
                v += vqdist[*(map++)];
            }

            last_active = c + 1;
            score = logmath_add(g->logmath, score, v + mgau->mixw[i]);
        }
    }

    if (score == S3_LOGPROB_ZERO) {
        E_INFO("Warning!! Score is S3_LOGPROB_ZERO\n");
    }

    return score;

}
Exemple #6
0
int
acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
{
    char nsenstr[64], logbasestr[64];

    sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef));
    sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath));
    return bio_writehdr(logfh,
                        "version", "0.1",
                        "mdef_file", cmd_ln_str_r(acmod->config, "-mdef"),
                        "n_sen", nsenstr,
                        "logbase", logbasestr, NULL);
}
Exemple #7
0
int
ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
{
    const char *path;
    const char *keyphrase;
    int32 lw;

    if (config && config != ps->config) {
        cmd_ln_free_r(ps->config);
        ps->config = cmd_ln_retain(config);
    }

    err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
    ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
    ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");
    ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir");

    /* Fill in some default arguments. */
    ps_init_defaults(ps);

    /* Free old searches (do this before other reinit) */
    ps_free_searches(ps);
    ps->searches = hash_table_new(3, HASH_CASE_YES);

    /* Free old acmod. */
    acmod_free(ps->acmod);
    ps->acmod = NULL;

    /* Free old dictionary (must be done after the two things above) */
    dict_free(ps->dict);
    ps->dict = NULL;

    /* Free d2p */
    dict2pid_free(ps->d2p);
    ps->d2p = NULL;

    /* Logmath computation (used in acmod and search) */
    if (ps->lmath == NULL
        || (logmath_get_base(ps->lmath) !=
            (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
        if (ps->lmath)
            logmath_free(ps->lmath);
        ps->lmath = logmath_init
            ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
             cmd_ln_boolean_r(ps->config, "-bestpath"));
    }

    /* Acoustic model (this is basically everything that
     * uttproc.c, senscr.c, and others used to do) */
    if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
        return -1;

    if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) {
        /* Initialize an auxiliary phone loop search, which will run in
         * "parallel" with FSG or N-Gram search. */
        if ((ps->phone_loop =
             phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL)
            return -1;
        hash_table_enter(ps->searches,
                         ckd_salloc(ps_search_name(ps->phone_loop)),
                         ps->phone_loop);
    }

    /* Dictionary and triphone mappings (depends on acmod). */
    /* FIXME: pass config, change arguments, implement LTS, etc. */
    if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL)
        return -1;
    if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
        return -1;

    lw = cmd_ln_float32_r(config, "-lw");

    /* Determine whether we are starting out in FSG or N-Gram search mode.
     * If neither is used skip search initialization. */

    /* Load KWS if one was specified in config */
    if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) {
        if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase))
            return -1;
        ps_set_search(ps, PS_DEFAULT_SEARCH);
    }

    if ((path = cmd_ln_str_r(config, "-kws"))) {
        if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path))
            return -1;
        ps_set_search(ps, PS_DEFAULT_SEARCH);
    }

    /* Load an FSG if one was specified in config */
    if ((path = cmd_ln_str_r(config, "-fsg"))) {
        fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw);
        if (!fsg)
            return -1;
        if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg))
            return -1;
        ps_set_search(ps, PS_DEFAULT_SEARCH);
    }
    
    /* Or load a JSGF grammar */
    if ((path = cmd_ln_str_r(config, "-jsgf"))) {
        if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path)
            || ps_set_search(ps, PS_DEFAULT_SEARCH))
            return -1;
    }

    if ((path = cmd_ln_str_r(ps->config, "-allphone"))) {
        if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path)
                || ps_set_search(ps, PS_DEFAULT_SEARCH))
                return -1;
    }

    if ((path = cmd_ln_str_r(ps->config, "-lm")) && 
        !cmd_ln_boolean_r(ps->config, "-allphone")) {
        if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path)
            || ps_set_search(ps, PS_DEFAULT_SEARCH))
            return -1;
    }

    if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) {
        const char *name;
        ngram_model_t *lmset;
        ngram_model_set_iter_t *lmset_it;

        if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) {
            E_ERROR("Failed to read language model control file: %s\n", path);
            return -1;
        }

        for(lmset_it = ngram_model_set_iter(lmset);
            lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) {
            
            ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name);            
            E_INFO("adding search %s\n", name);
            if (ps_set_lm(ps, name, lm)) {
    		ngram_model_free(lm);
                ngram_model_set_iter_free(lmset_it);
                return -1;
            }
	    ngram_model_free(lm);
        }

        name = cmd_ln_str_r(config, "-lmname");
        if (name)
            ps_set_search(ps, name);
        else {
            E_ERROR("No default LM name (-lmname) for `-lmctl'\n");
            return -1;
        }
    }

    /* Initialize performance timer. */
    ps->perf.name = "decode";
    ptmr_init(&ps->perf);

    return 0;
}
Exemple #8
0
int
ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
{
    char const *lmfile, *lmctl = NULL;

    if (config && config != ps->config) {
        cmd_ln_free_r(ps->config);
        ps->config = config;
    }
#ifndef _WIN32_WCE
    /* Set up logging. */
    if (cmd_ln_str_r(ps->config, "-logfn"))
        err_set_logfile(cmd_ln_str_r(ps->config, "-logfn"));
#endif
    err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug"));
    ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir");
    ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir");

    /* Fill in some default arguments. */
    ps_init_defaults(ps);

    /* Free old searches (do this before other reinit) */
    ps_free_searches(ps);

    /* Free old acmod. */
    acmod_free(ps->acmod);
    ps->acmod = NULL;

    /* Free old dictionary (must be done after the two things above) */
    dict_free(ps->dict);
    ps->dict = NULL;


    /* Logmath computation (used in acmod and search) */
    if (ps->lmath == NULL
        || (logmath_get_base(ps->lmath) != 
            (float64)cmd_ln_float32_r(ps->config, "-logbase"))) {
        if (ps->lmath)
            logmath_free(ps->lmath);
        ps->lmath = logmath_init
            ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0,
             cmd_ln_boolean_r(ps->config, "-bestpath"));
    }

    /* Acoustic model (this is basically everything that
     * uttproc.c, senscr.c, and others used to do) */
    if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL)
        return -1;
    /* Make the acmod's feature buffer growable if we are doing two-pass search. */
    if (cmd_ln_boolean_r(ps->config, "-fwdflat")
        && cmd_ln_boolean_r(ps->config, "-fwdtree"))
        acmod_set_grow(ps->acmod, TRUE);

    if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) {
        /* Initialize an auxiliary phone loop search, which will run in
         * "parallel" with FSG or N-Gram search. */
        if ((ps->phone_loop = phone_loop_search_init(ps->config,
                                                     ps->acmod, ps->dict)) == NULL)
            return -1;
        ps->searches = glist_add_ptr(ps->searches, ps->phone_loop);
    }

    /* Dictionary and triphone mappings (depends on acmod). */
    /* FIXME: pass config, change arguments, implement LTS, etc. */
    if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL)
        return -1;

    /* Determine whether we are starting out in FSG or N-Gram search mode. */
    if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) {
        ps_search_t *fsgs;

        if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
            return -1;
        if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
            return -1;
        fsgs->pls = ps->phone_loop;
        ps->searches = glist_add_ptr(ps->searches, fsgs);
        ps->search = fsgs;
    }
    else if ((lmfile = cmd_ln_str_r(ps->config, "-lm"))
             || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) {
        ps_search_t *ngs;

        if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
            return -1;
        if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL)
            return -1;
        ngs->pls = ps->phone_loop;
        ps->searches = glist_add_ptr(ps->searches, ngs);
        ps->search = ngs;
    }
    /* Otherwise, we will initialize the search whenever the user
     * decides to load an FSG or a language model. */
    else {
        if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL)
            return -1;
    }

    /* Initialize performance timer. */
    ps->perf.name = "decode";
    ptmr_init(&ps->perf);

    return 0;
}
s2_semi_mgau_t *
s2_semi_mgau_init(cmd_ln_t *config, logmath_t *lmath, feat_t *fcb, mdef_t *mdef)
{
    s2_semi_mgau_t *s;
    char const *sendump_path;
    float32 **fgau;
    int i;

    s = ckd_calloc(1, sizeof(*s));
    s->config = config;

    s->lmath = logmath_retain(lmath);
    /* Log-add table. */
    s->lmath_8b = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE);
    if (s->lmath_8b == NULL) {
        s2_semi_mgau_free(s);
        return NULL;
    }
    /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
    if (logmath_get_width(s->lmath_8b) != 1) {
        E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
                logmath_get_base(s->lmath_8b));
        s2_semi_mgau_free(s);
        return NULL;
    }

    /* Inherit stream dimensions from acmod, will be checked below. */
    s->n_feat = feat_dimension1(fcb);
    s->veclen = ckd_calloc(s->n_feat, sizeof(int32));
    for (i = 0; i < s->n_feat; ++i)
        s->veclen[i] = feat_dimension2(fcb, i);

    /* Read means and variances. */
    if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fgau) < 0) {
        s2_semi_mgau_free(s);
        return NULL;
    }
    s->means = (mfcc_t **)fgau;
    if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fgau) < 0) {
        s2_semi_mgau_free(s);
        return NULL;
    }
    s->vars = (mfcc_t **)fgau;

    /* Precompute (and fixed-point-ize) means, variances, and determinants. */
    s->dets = (mfcc_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets));
    s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor"));

    /* Read mixture weights */
    if ((sendump_path = cmd_ln_str_r(s->config, "-sendump")))
        read_sendump(s, mdef, sendump_path);
    else
        read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
                  cmd_ln_float32_r(s->config, "-mixwfloor"));
    s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");

    /* Determine top-N for each feature */
    s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam));
    s->max_topn = cmd_ln_int32_r(s->config, "-topn");
    split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat);
    E_INFO("Maximum top-N: %d ", s->max_topn);
    E_INFOCONT("Top-N beams:");
    for (i = 0; i < s->n_feat; ++i) {
        E_INFOCONT(" %d", s->topn_beam[i]);
    }
    E_INFOCONT("\n");

    /* Top-N scores from recent frames */
    s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
    s->topn_hist = (vqFeature_t ***)
        ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn,
                      sizeof(***s->topn_hist));
    s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat,
                                   sizeof(**s->topn_hist_n));
    for (i = 0; i < s->n_topn_hist; ++i) {
        int j;
        for (j = 0; j < s->n_feat; ++j) {
            int k;
            for (k = 0; k < s->max_topn; ++k) {
                s->topn_hist[i][j][k].score = WORST_DIST;
                s->topn_hist[i][j][k].codeword = k;
            }
        }
    }

    return s;
}