ngram_model_t * ngram_model_set_init(cmd_ln_t * config, ngram_model_t ** models, char **names, const float32 * weights, int32 n_models) { ngram_model_set_t *model; ngram_model_t *base; logmath_t *lmath; int32 i, n; if (n_models == 0) /* WTF */ return NULL; /* Do consistency checking on the models. They must all use the * same logbase and shift. */ lmath = models[0]->lmath; for (i = 1; i < n_models; ++i) { if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath) || logmath_get_shift(models[i]->lmath) != logmath_get_shift(lmath)) { E_ERROR ("Log-math parameters don't match, will not create LM set\n"); return NULL; } } /* Allocate the combined model, initialize it. */ model = ckd_calloc(1, sizeof(*model)); base = &model->base; model->n_models = n_models; model->lms = ckd_calloc(n_models, sizeof(*model->lms)); model->names = ckd_calloc(n_models, sizeof(*model->names)); /* Initialize weights to a uniform distribution */ model->lweights = ckd_calloc(n_models, sizeof(*model->lweights)); { int32 uniform = logmath_log(lmath, 1.0 / n_models); for (i = 0; i < n_models; ++i) model->lweights[i] = uniform; } /* Default to interpolate if weights were given. */ if (weights) model->cur = -1; n = 0; for (i = 0; i < n_models; ++i) { model->lms[i] = ngram_model_retain(models[i]); model->names[i] = ckd_salloc(names[i]); if (weights) model->lweights[i] = logmath_log(lmath, weights[i]); /* N is the maximum of all merged models. */ if (models[i]->n > n) n = models[i]->n; } /* Allocate the history mapping table. */ model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist)); /* Now build the word-ID mapping and merged vocabulary. */ build_widmap(base, lmath, n); return base; }
static int acmod_read_senfh_header(acmod_t *acmod) { char **name, **val; int32 swap; int i; if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0) goto error_out; for (i = 0; name[i] != NULL; ++i) { if (!strcmp(name[i], "n_sen")) { if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) { E_ERROR("Number of senones in senone file (%d) does not " "match mdef (%d)\n", atoi(val[i]), bin_mdef_n_sen(acmod->mdef)); goto error_out; } } if (!strcmp(name[i], "logbase")) { if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) { E_ERROR("Logbase in senone file (%f) does not match acmod " "(%f)\n", atof_c(val[i]), logmath_get_base(acmod->lmath)); goto error_out; } } } acmod->insen_swap = swap; bio_hdrarg_free(name, val); return 0; error_out: bio_hdrarg_free(name, val); return -1; }
static void evaluate_string(ngram_model_t *lm, logmath_t *lmath, const char *text) { char *textfoo; char **words; int32 n, ch, noovs, nccs, lscr; /* Split it into an array of strings. */ textfoo = ckd_salloc(text); n = str2words(textfoo, NULL, 0); if (n < 0) E_FATAL("str2words(textfoo, NULL, 0) = %d, should not happen\n", n); if (n == 0) /* Do nothing! */ return; words = ckd_calloc(n, sizeof(*words)); str2words(textfoo, words, n); ch = calc_entropy(lm, words, n, &nccs, &noovs, &lscr); printf("input: %s\n", text); printf("cross-entropy: %f bits\n", ch * log(logmath_get_base(lmath)) / log(2)); /* Calculate perplexity pplx = exp CH */ printf("perplexity: %f\n", logmath_exp(lmath, ch)); printf("lm score: %d\n", lscr); /* Report OOVs and CCs */ printf("%d words evaluated\n", n); printf("%d OOVs, %d context cues removed\n", noovs, nccs); ckd_free(textfoo); ckd_free(words); }
static void evaluate_file(ngram_model_t *lm, logmath_t *lmath, const char *lsnfn) { FILE *fh; lineiter_t *litor; int32 nccs, noovs, nwords, lscr; float64 ch, log_to_log2;; if ((fh = fopen(lsnfn, "r")) == NULL) E_FATAL_SYSTEM("failed to open transcript file %s", lsnfn); /* We have to keep ch in floating-point to avoid overflows, so * we might as well use log2. */ log_to_log2 = log(logmath_get_base(lmath)) / log(2); lscr = nccs = noovs = nwords = 0; ch = 0.0; for (litor = lineiter_start(fh); litor; litor = lineiter_next(litor)) { char **words; int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr; n = str2words(litor->buf, NULL, 0); if (n < 0) E_FATAL("str2words(line, NULL, 0) = %d, should not happen\n", n); if (n == 0) /* Do nothing! */ continue; words = ckd_calloc(n, sizeof(*words)); str2words(litor->buf, words, n); /* Remove any utterance ID (FIXME: has to be a single "word") */ if (words[n-1][0] == '(' && words[n-1][strlen(words[n-1])-1] == ')') n = n - 1; tmp_ch = calc_entropy(lm, words, n, &tmp_nccs, &tmp_noovs, &tmp_lscr); ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2; nccs += tmp_nccs; noovs += tmp_noovs; lscr += tmp_lscr; nwords += n; ckd_free(words); } ch /= (nwords - nccs - noovs); printf("cross-entropy: %f bits\n", ch); /* Calculate perplexity pplx = exp CH */ printf("perplexity: %f\n", pow(2.0, ch)); printf("lm score: %d\n", lscr); /* Report OOVs and CCs */ printf("%d words evaluated\n", nwords); printf("%d OOVs (%.2f%%), %d context cues removed\n", noovs, (double)noovs / nwords * 100, nccs); }
int32 subvq_mgau_eval(mgau_model_t * g, subvq_t * vq, int32 m, int32 n, int32 * active) { mgau_t *mgau; int32 *map; int32 i, v, sv_id; int32 c; int32 *vqdist; int32 score; int32 last_active; float64 f; f = 1.0 / log(logmath_get_base(g->logmath)); vqdist = vq->vqdist[0]; score = S3_LOGPROB_ZERO; mgau = &(g->mgau[m]); map = vq->map[m][0]; if (!active) { for (i = 0; i < n; i++) { v = 0; for (sv_id = 0; sv_id < vq->n_sv; sv_id++) { v += vqdist[*(map++)]; } score = logmath_add(g->logmath, score, v + mgau->mixw[i]); } } else { last_active = 0; for (i = 0; active[i] >= 0; i++) { c = active[i]; } for (i = 0; active[i] >= 0; i++) { c = active[i]; map += (c - last_active) * vq->n_sv; v = 0; for (sv_id = 0; sv_id < vq->n_sv; sv_id++) { v += vqdist[*(map++)]; } last_active = c + 1; score = logmath_add(g->logmath, score, v + mgau->mixw[i]); } } if (score == S3_LOGPROB_ZERO) { E_INFO("Warning!! Score is S3_LOGPROB_ZERO\n"); } return score; }
int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh) { char nsenstr[64], logbasestr[64]; sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef)); sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath)); return bio_writehdr(logfh, "version", "0.1", "mdef_file", cmd_ln_str_r(acmod->config, "-mdef"), "n_sen", nsenstr, "logbase", logbasestr, NULL); }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyphrase; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ckd_salloc(ps_search_name(ps->phone_loop)), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) { if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Or load a JSGF grammar */ if ((path = cmd_ln_str_r(config, "-jsgf"))) { if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lm")) && !cmd_ln_boolean_r(ps->config, "-allphone")) { if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_free(lm); ngram_model_set_iter_free(lmset_it); return -1; } ngram_model_free(lm); } name = cmd_ln_str_r(config, "-lmname"); if (name) ps_set_search(ps, name); else { E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); return -1; } } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { char const *lmfile, *lmctl = NULL; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = config; } #ifndef _WIN32_WCE /* Set up logging. */ if (cmd_ln_str_r(ps->config, "-logfn")) err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")); #endif err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; /* Make the acmod's feature buffer growable if we are doing two-pass search. */ if (cmd_ln_boolean_r(ps->config, "-fwdflat") && cmd_ln_boolean_r(ps->config, "-fwdtree")) acmod_set_grow(ps->acmod, TRUE); if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; ps->searches = glist_add_ptr(ps->searches, ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; /* Determine whether we are starting out in FSG or N-Gram search mode. */ if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) { ps_search_t *fsgs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; fsgs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, fsgs); ps->search = fsgs; } else if ((lmfile = cmd_ln_str_r(ps->config, "-lm")) || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) { ps_search_t *ngs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; ngs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, ngs); ps->search = ngs; } /* Otherwise, we will initialize the search whenever the user * decides to load an FSG or a language model. */ else { if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
s2_semi_mgau_t * s2_semi_mgau_init(cmd_ln_t *config, logmath_t *lmath, feat_t *fcb, mdef_t *mdef) { s2_semi_mgau_t *s; char const *sendump_path; float32 **fgau; int i; s = ckd_calloc(1, sizeof(*s)); s->config = config; s->lmath = logmath_retain(lmath); /* Log-add table. */ s->lmath_8b = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE); if (s->lmath_8b == NULL) { s2_semi_mgau_free(s); return NULL; } /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ if (logmath_get_width(s->lmath_8b) != 1) { E_ERROR("Log base %f is too small to represent add table in 8 bits\n", logmath_get_base(s->lmath_8b)); s2_semi_mgau_free(s); return NULL; } /* Inherit stream dimensions from acmod, will be checked below. */ s->n_feat = feat_dimension1(fcb); s->veclen = ckd_calloc(s->n_feat, sizeof(int32)); for (i = 0; i < s->n_feat; ++i) s->veclen[i] = feat_dimension2(fcb, i); /* Read means and variances. */ if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fgau) < 0) { s2_semi_mgau_free(s); return NULL; } s->means = (mfcc_t **)fgau; if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fgau) < 0) { s2_semi_mgau_free(s); return NULL; } s->vars = (mfcc_t **)fgau; /* Precompute (and fixed-point-ize) means, variances, and determinants. */ s->dets = (mfcc_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets)); s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor")); /* Read mixture weights */ if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) read_sendump(s, mdef, sendump_path); else read_mixw(s, cmd_ln_str_r(s->config, "-mixw"), cmd_ln_float32_r(s->config, "-mixwfloor")); s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); /* Determine top-N for each feature */ s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam)); s->max_topn = cmd_ln_int32_r(s->config, "-topn"); split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat); E_INFO("Maximum top-N: %d ", s->max_topn); E_INFOCONT("Top-N beams:"); for (i = 0; i < s->n_feat; ++i) { E_INFOCONT(" %d", s->topn_beam[i]); } E_INFOCONT("\n"); /* Top-N scores from recent frames */ s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; s->topn_hist = (vqFeature_t ***) ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn, sizeof(***s->topn_hist)); s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat, sizeof(**s->topn_hist_n)); for (i = 0; i < s->n_topn_hist; ++i) { int j; for (j = 0; j < s->n_feat; ++j) { int k; for (k = 0; k < s->max_topn; ++k) { s->topn_hist[i][j][k].score = WORST_DIST; s->topn_hist[i][j][k].codeword = k; } } } return s; }