int main(int argc, char *argv[]) { logmath_t *lmath; cmd_ln_t *config; acmod_t *acmod; ps_mgau_t *ps; ptm_mgau_t *s; int i, lastcb; lmath = logmath_init(1.0001, 0, 0); config = cmd_ln_init(NULL, ps_args(), TRUE, "-compallsen", "yes", "-input_endian", "little", NULL); cmd_ln_parse_file_r(config, ps_args(), MODELDIR "/en-us/en-us/feat.params", FALSE); cmd_ln_set_str_extra_r(config, "_mdef", MODELDIR "/en-us/en-us/mdef"); cmd_ln_set_str_extra_r(config, "_mean", MODELDIR "/en-us/en-us/means"); cmd_ln_set_str_extra_r(config, "_var", MODELDIR "/en-us/en-us/variances"); cmd_ln_set_str_extra_r(config, "_tmat", MODELDIR "/en-us/en-us/transition_matrices"); cmd_ln_set_str_extra_r(config, "_sendump", MODELDIR "/en-us/en-us/sendump"); cmd_ln_set_str_extra_r(config, "_mixw", NULL); cmd_ln_set_str_extra_r(config, "_lda", NULL); cmd_ln_set_str_extra_r(config, "_senmgau", NULL); err_set_debug_level(3); TEST_ASSERT(config); TEST_ASSERT((acmod = acmod_init(config, lmath, NULL, NULL))); TEST_ASSERT((ps = acmod->mgau)); TEST_EQUAL(0, strcmp(ps->vt->name, "ptm")); s = (ptm_mgau_t *)ps; E_DEBUG(2,("PTM model loaded: %d codebooks, %d senones, %d frames of history\n", s->g->n_mgau, s->n_sen, s->n_fast_hist)); E_DEBUG(2,("Senone to codebook mappings:\n")); lastcb = s->sen2cb[0]; E_DEBUG(2,("\t%d: 0", lastcb)); for (i = 0; i < s->n_sen; ++i) { if (s->sen2cb[i] != lastcb) { lastcb = s->sen2cb[i]; E_DEBUGCONT(2,("-%d\n", i-1)); E_DEBUGCONT(2,("\t%d: %d", lastcb, i)); } } E_INFOCONT("-%d\n", i-1); run_acmod_test(acmod); #if 0 /* Replace it with ms_mgau. */ ptm_mgau_free(ps); cmd_ln_set_str_r(config, "-mixw", MODELDIR "/en-us/en-us/mixture_weights"); TEST_ASSERT((acmod->mgau = ms_mgau_init(acmod, lmath, acmod->mdef))); run_acmod_test(acmod); cmd_ln_free_r(config); #endif return 0; }
int main(int argc, char *argv[]) { cmd_ln_t *config; ngram_model_t *lm = NULL; logmath_t *lmath; int itype, otype; char const *kase; if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL) return 1; if (cmd_ln_boolean_r(config, "-help")) { usagemsg(argv[0]); } err_set_debug_level(cmd_ln_int32_r(config, "-debug")); /* Create log math object. */ if ((lmath = logmath_init (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) { E_FATAL("Failed to initialize log math\n"); } if (cmd_ln_str_r(config, "-i") == NULL || cmd_ln_str_r(config, "-i") == NULL) { E_ERROR("Please specify both input and output models\n"); goto error_out; } /* Load the input language model. */ if (cmd_ln_str_r(config, "-ifmt")) { if ((itype = ngram_str_to_type(cmd_ln_str_r(config, "-ifmt"))) == NGRAM_INVALID) { E_ERROR("Invalid input type %s\n", cmd_ln_str_r(config, "-ifmt")); goto error_out; } lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), itype, lmath); } else { lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"), NGRAM_AUTO, lmath); } /* Guess or set the output language model type. */ if (cmd_ln_str_r(config, "-ofmt")) { if ((otype = ngram_str_to_type(cmd_ln_str_r(config, "-ofmt"))) == NGRAM_INVALID) { E_ERROR("Invalid output type %s\n", cmd_ln_str_r(config, "-ofmt")); goto error_out; } } else { otype = ngram_file_name_to_type(cmd_ln_str_r(config, "-o")); } /* Recode the language model if desired. */ if (cmd_ln_str_r(config, "-ienc")) { if (ngram_model_recode(lm, cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")) != 0) { E_ERROR("Failed to recode language model from %s to %s\n", cmd_ln_str_r(config, "-ienc"), cmd_ln_str_r(config, "-oenc")); goto error_out; } } /* Case fold if requested. */ if ((kase = cmd_ln_str_r(config, "-case"))) { if (0 == strcmp(kase, "lower")) { ngram_model_casefold(lm, NGRAM_LOWER); } else if (0 == strcmp(kase, "upper")) { ngram_model_casefold(lm, NGRAM_UPPER); } else { E_ERROR("Unknown value for -case: %s\n", kase); goto error_out; } } /* Write the output language model. */ if (ngram_model_write(lm, cmd_ln_str_r(config, "-o"), otype) != 0) { E_ERROR("Failed to write language model in format %s to %s\n", ngram_type_to_str(otype), cmd_ln_str_r(config, "-o")); goto error_out; } /* That's all folks! */ ngram_model_free(lm); return 0; error_out: ngram_model_free(lm); return 1; }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyphrase; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ckd_salloc(ps_search_name(ps->phone_loop)), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) { if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Or load a JSGF grammar */ if ((path = cmd_ln_str_r(config, "-jsgf"))) { if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lm")) && !cmd_ln_boolean_r(ps->config, "-allphone")) { if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_free(lm); ngram_model_set_iter_free(lmset_it); return -1; } ngram_model_free(lm); } name = cmd_ln_str_r(config, "-lmname"); if (name) ps_set_search(ps, name); else { E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); return -1; } } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
void kb_init(kb_t * kb, cmd_ln_t *config) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; int32 cisencnt; /* STRUCTURE: Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init(config); if (kb->kbcore == NULL) E_FATAL("Initialization of kb failed\n"); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); d2p = kbcore_dict2pid(kbcore); err_set_debug_level(cmd_ln_int32_r(config, "-debug")); /* STRUCTURE INITIALIZATION: Initialize the beam data structure */ if (cmd_ln_exists_r(config, "-ptranskip")) { kb->beam = beam_init(cmd_ln_float64_r(config, "-beam"), cmd_ln_float64_r(config, "-pbeam"), cmd_ln_float64_r(config, "-wbeam"), cmd_ln_float64_r(config, "-wend_beam"), cmd_ln_int32_r(config, "-ptranskip"), mdef_n_ciphone(mdef), kbcore->logmath ); /* REPORT : Report the parameters in the beam data structure */ if (REPORT_KB) beam_report(kb->beam); } /* STRUCTURE INITIALIZATION: Initialize the fast GMM computation data structure */ if (cmd_ln_exists_r(config, "-ci_pbeam")) { kb->fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"), cmd_ln_int32_r(config, "-cond_ds"), cmd_ln_int32_r(config, "-dist_ds"), cmd_ln_int32_r(config, "-gs4gs"), cmd_ln_int32_r(config, "-svq4svq"), cmd_ln_float64_r(config, "-subvqbeam"), cmd_ln_float64_r(config, "-ci_pbeam"), cmd_ln_float64_r(config, "-tighten_factor"), cmd_ln_int32_r(config, "-maxcdsenpf"), mdef->n_ci_sen, kbcore->logmath); /* REPORT : Report the parameters in the fast_gmm_t data struture */ if (REPORT_KB) fast_gmm_report(kb->fastgmm); } /* STRUCTURE INITIALIZATION: Initialize the phoneme lookahead data structure */ if (cmd_ln_exists_r(config, "-pl_beam")) { kb->pl = pl_init(cmd_ln_int32_r(config, "-pheurtype"), cmd_ln_float64_r(config, "-pl_beam"), mdef_n_ciphone(mdef), kbcore->logmath ); /* REPORT : Report the parameters in the pl_t data struture */ if (REPORT_KB) pl_report(kb->pl); } /* STRUCTURE INITIALIZATION: Initialize the acoustic score data structure */ { int32 pl_window = 1; if (cmd_ln_exists_r(config, "-pl_window")) pl_window = cmd_ln_int32_r(config, "-pl_window"); for (cisencnt = 0; cisencnt == mdef->cd2cisen[cisencnt]; cisencnt++) ; kb->ascr = ascr_init(kbcore_n_mgau(kbcore), kb->kbcore->dict2pid->n_comstate, mdef_n_sseq(mdef), dict2pid_n_comsseq(d2p), pl_window, cisencnt); if (REPORT_KB) ascr_report(kb->ascr); } /* Initialize the front end if -adcin is specified */ if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) { if ((kb->fe = fe_init_auto_r(config)) == NULL) { E_FATAL("fe_init_auto_r() failed\n"); } } /* STRUCTURE INITIALIZATION : The feature vector */ if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore), S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); /* STRUCTURE INITIALIZATION : The statistics for the search */ kb->stat = stat_init(); /* STRUCTURE INITIALIZATION : The adaptation routines of the search */ kb->adapt_am = adapt_am_init(); if (cmd_ln_str_r(config, "-mllr")) { kb_setmllr(cmd_ln_str_r(config, "-mllr"), cmd_ln_str_r(config, "-cb2mllr"), kb); } /* CHECK: make sure when (-cond_ds) is specified, a Gaussian map is also specified */ if (cmd_ln_int32_r(config, "-cond_ds") > 0 && kb->kbcore->gs == NULL) E_FATAL ("Conditional Down Sampling require the use of Gaussian Selection map\n"); /* MEMORY ALLOCATION : Word best score and exit */ /* Open hypseg file if specified */ kb->matchsegfp = kb->matchfp = NULL; kb->matchsegfp = file_open(cmd_ln_str_r(config, "-hypseg")); kb->matchfp = file_open(cmd_ln_str_r(config, "-hyp")); if (cmd_ln_exists_r(config, "-hmmdump")) kb->hmmdumpfp = cmd_ln_int32_r(config, "-hmmdump") ? stderr : NULL; /* STRUCTURE INITIALIZATION : The search data structure, done only after kb is initialized kb is acted as a clipboard. */ if (cmd_ln_exists_r(config, "-op_mode")) { /* -op_mode, if set (i.e. not -1), takes precedence over -mode. */ if (cmd_ln_int32_r(config, "-op_mode") != -1) kb->op_mode = cmd_ln_int32_r(config, "-op_mode"); else kb->op_mode = srch_mode_str_to_index(cmd_ln_str_r(config, "-mode")); E_INFO("SEARCH MODE INDEX %d\n", kb->op_mode); if ((kb->srch = (srch_t *) srch_init(kb, kb->op_mode)) == NULL) { E_FATAL("Search initialization failed. Forced exit\n"); } if (REPORT_KB) { srch_report(kb->srch); } } }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { char const *lmfile, *lmctl = NULL; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = config; } #ifndef _WIN32_WCE /* Set up logging. */ if (cmd_ln_str_r(ps->config, "-logfn")) err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")); #endif err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; /* Make the acmod's feature buffer growable if we are doing two-pass search. */ if (cmd_ln_boolean_r(ps->config, "-fwdflat") && cmd_ln_boolean_r(ps->config, "-fwdtree")) acmod_set_grow(ps->acmod, TRUE); if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; ps->searches = glist_add_ptr(ps->searches, ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; /* Determine whether we are starting out in FSG or N-Gram search mode. */ if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) { ps_search_t *fsgs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; fsgs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, fsgs); ps->search = fsgs; } else if ((lmfile = cmd_ln_str_r(ps->config, "-lm")) || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) { ps_search_t *ngs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; ngs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, ngs); ps->search = ngs; } /* Otherwise, we will initialize the search whenever the user * decides to load an FSG or a language model. */ else { if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }