int main(int _argc, char **_argv) { char *ctrlfn; char *cfgfn; cmd_ln_t *config = NULL; print_appl_info(_argv[0]); if (_argc != 4) { printf("\nUSAGE: %s <ctrlfile> <rawdir> <cfgfile>\n", _argv[0]); return -1; } ctrlfn = _argv[1]; rawdirfn = _argv[2]; cfgfn = _argv[3]; if ((config = cmd_ln_parse_file_r(config, S3_DECODE_ARG_DEFS, cfgfn, TRUE)) == NULL) E_FATAL("Bad configuration file %s.\n", cfgfn); if (s3_decode_init(&decoder, config) != S3_DECODE_SUCCESS) E_FATAL("Failed to initialize live-decoder.\n"); fe = fe_init_auto_r(config); st = decoder.kb.stat; ptmr_init(&(st->tm)); if (ctrlfn) { /* When -ctlfile is speicified, corpus.c will look at -ctl_lm and -ctl_mllr to get the corresponding LM and MLLR for the utterance */ st->tm = ctl_process(ctrlfn, cmd_ln_str_r(config, "-ctl_lm"), cmd_ln_str_r(config, "-ctl_mllr"), cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_livepretend, &(decoder.kb)); } else { E_FATAL("control file is not specified.\n"); } stat_report_corpus(decoder.kb.stat); s3_decode_close(&decoder); fe_free(fe); return 0; }
int main(int argc, char *argv[]) { print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", defn); unlimit(); config = cmd_ln_get(); logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); E_INFO("Value of base %f \n", cmd_ln_float32_r(config, "-logbase")); models_init(); ptmr_init(&tm_utt); if ((inmatchsegfp = fopen(cmd_ln_str_r(config, "-inhypseg"), "r")) == NULL) E_ERROR("fopen(%s,r) failed\n", cmd_ln_str_r(config, "-inhypseg")); if ((outconfmatchsegfp = fopen(cmd_ln_str_r(config, "-output"), "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", cmd_ln_str_r(config, "-output")); if (cmd_ln_str_r(config, "-ctl")) { ctl_process(cmd_ln_str_r(config, "-ctl"), cmd_ln_str_r(config, "-ctl_lm"), NULL, cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_confidence, NULL); } else { E_FATAL("-ctl is not specified\n"); } #if (! WIN32) system("ps auxwww | grep s3dag"); #endif fclose(outconfmatchsegfp); fclose(inmatchsegfp); models_free(); logmath_free(logmath); cmd_ln_free_r(config); return 0; }
int main(int32 argc, char *argv[]) { /* kb_t kb; ptmr_t tm; */ cmd_ln_appl_enter(argc, argv, "default.arg", defn); config = cmd_ln_get(); logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); /* Read in input databases */ models_init(); ptmr_init(&tm_utt); nbestdir = cmd_ln_str_r(config, "-nbestdir"); if (cmd_ln_str_r(config, "-ctl")) { ctl_process(cmd_ln_str_r(config, "-ctl"), cmd_ln_str_r(config, "-ctl_lm"), NULL, cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_astar, NULL); } else { E_FATAL("-ctl is not specified\n"); } models_free(); logmath_free(logmath); #if (! WIN32) system("ps aguxwww | grep s3astar"); #endif cmd_ln_free_r(config); return 0; }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyphrase; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ckd_salloc(ps_search_name(ps->phone_loop)), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) { if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Or load a JSGF grammar */ if ((path = cmd_ln_str_r(config, "-jsgf"))) { if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lm")) && !cmd_ln_boolean_r(ps->config, "-allphone")) { if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_free(lm); ngram_model_set_iter_free(lmset_it); return -1; } ngram_model_free(lm); } name = cmd_ln_str_r(config, "-lmname"); if (name) ps_set_search(ps, name); else { E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); return -1; } } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
float64 vector_vqgen (float32 **data, int32 rows, int32 cols, int32 vqrows, float64 epsilon, int32 maxiter, float32 **mean, int32 *map) { int32 i, j, r, it; static uint32 seed = 1; float64 sqerr, prev_sqerr=0, t; bitvec_t sel; int32 *count; float32 *gmean; ptmr_t tm; assert ((rows >= vqrows) && (maxiter >= 0) && (epsilon > 0.0)); sel = bitvec_alloc (rows); ptmr_init (&tm); ptmr_start (&tm); /* Pick a random initial set of centroids */ #ifndef WIN32 /* RAH */ srandom (seed); seed ^= random(); #else /* RAH */ srand ((unsigned) time(NULL)); /* RAH */ #endif for (i = 0; i < vqrows; i++) { /* Find r = a random, previously unselected row from the input */ #ifndef WIN32 /* RAH */ r = (random() & (int32)0x7fffffff) % rows; #else /* RAH */ r = (rand() & (int32)0x7fffffff) % rows; /* RAH */ #endif /* RAH */ while (bitvec_is_set (sel, r)) { /* BUG: possible infinite loop!! */ if (++r >= rows) r = 0; } bitvec_set (sel, r); memcpy ((void *)(mean[i]), (void *)(data[r]), cols * sizeof(float32)); /* BUG: What if two randomly selected rows are identical in content?? */ } bitvec_free (sel); count = (int32 *) ckd_calloc (vqrows, sizeof(int32)); /* In k-means, unmapped means in any iteration are a problem. Replace them with gmean */ gmean = (float32 *) ckd_calloc (cols, sizeof(float32)); vector_mean (gmean, mean, vqrows, cols); for (it = 0;; it++) { /* Iterations of k-means algorithm */ /* Find the current data->mean mappings (labels) */ sqerr = 0.0; for (i = 0; i < rows; i++) { map[i] = vector_vqlabel (data[i], mean, vqrows, cols, &t); sqerr += t; } ptmr_stop(&tm); if (it == 0) E_INFO("Iter %4d: %.1fs CPU; sqerr= %e\n", it, tm.t_cpu, sqerr); else E_INFO("Iter %4d: %.1fs CPU; sqerr= %e; delta= %e\n", it, tm.t_cpu, sqerr, (prev_sqerr-sqerr)/prev_sqerr); /* Check if exit condition satisfied */ if ((sqerr == 0.0) || (it >= maxiter-1) || ((it > 0) && ( ((prev_sqerr - sqerr) / prev_sqerr) < epsilon )) ) break; prev_sqerr = sqerr; ptmr_start(&tm); /* Update (reestimate) means */ for (i = 0; i < vqrows; i++) { for (j = 0; j < cols; j++) mean[i][j] = 0.0; count[i] = 0; } for (i = 0; i < rows; i++) { vector_accum (mean[map[i]], data[i], cols); count[map[i]]++; } for (i = 0; i < vqrows; i++) { if (count[i] > 1) { t = 1.0 / (float64)(count[i]); for (j = 0; j < cols; j++) /* mean[i][j] *= t; */ /* RAH, compiler was complaining about this, */ mean[i][j] = (float32) ((float64) mean[i][j] * (float64) t); /* */ } else if (count[i] == 0) { E_ERROR("Iter %d: mean[%d] unmapped\n", it, i); memcpy (mean[i], gmean, cols * sizeof(float32)); } } } ckd_free (count); ckd_free (gmean); return sqerr; }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), "1s_c_d_dd", /* Hack!! Hardwired constant for -feat argument */ cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); E_INFO("Building lextrees\n"); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; /* Create the desired no. of unigram lextrees */ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } bitvec_free (lc_active); ckd_free ((void *) lc); /* Create filler lextrees */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0]), lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { char const *lmfile, *lmctl = NULL; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = config; } #ifndef _WIN32_WCE /* Set up logging. */ if (cmd_ln_str_r(ps->config, "-logfn")) err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")); #endif err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; /* Make the acmod's feature buffer growable if we are doing two-pass search. */ if (cmd_ln_boolean_r(ps->config, "-fwdflat") && cmd_ln_boolean_r(ps->config, "-fwdtree")) acmod_set_grow(ps->acmod, TRUE); if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; ps->searches = glist_add_ptr(ps->searches, ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; /* Determine whether we are starting out in FSG or N-Gram search mode. */ if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) { ps_search_t *fsgs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; fsgs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, fsgs); ps->search = fsgs; } else if ((lmfile = cmd_ln_str_r(ps->config, "-lm")) || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) { ps_search_t *ngs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; ngs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, ngs); ps->search = ngs; } /* Otherwise, we will initialize the search whenever the user * decides to load an FSG or a language model. */ else { if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
ptmr_t ctl_process_utt(const char *uttfile, int32 count, void (*func) (void *kb, utt_res_t * ur, int32 sf, int32 ef, char *uttid), void *kb) { char utterance_file[16384]; char uttid[4096]; const char *base; int32 i, c; int32 ts, newts; ptmr_t tm; utt_res_t *ur; ptmr_init(&tm); ur = new_utt_res(); base = path2basename(uttfile); /* strip_fileext() copies base to uttid. So, copying uttid to base * is redundant if strip_fileext() is not called. */ /* strip_fileext (base, uttid); strcpy (base, uttid); */ ts = -1; for (c = 0; c < count; c++) { /* Wait for uttfile to change from previous iteration */ for (i = 0;; i++) { newts = stat_mtime(uttfile); if ((newts >= 0) && (newts != ts)) break; if (i == 0) E_INFO("Waiting for %s, count %d, c %d\n", uttfile, count, c); SLEEP_SEC(1); } ts = newts; /* Form uttid */ sprintf(uttid, "%s_%08d", base, c); strncpy(utterance_file, uttfile, sizeof(utterance_file) - 1); utterance_file[sizeof(utterance_file) - 1] = 0; /* Process this utterance */ ptmr_start(&tm); if (func) { utt_res_set_uttfile(ur, utterance_file); (*func) (kb, ur, 0, -1, uttid); } ptmr_stop(&tm); E_INFO ("%s: %6.1f sec CPU, %6.1f sec Clk; TOT: %8.1f sec CPU, %8.1f sec Clk\n\n", uttid, tm.t_cpu, tm.t_elapsed, tm.t_tot_cpu, tm.t_tot_elapsed); ptmr_reset(&tm); } if (ur) free_utt_res(ur); return tm; }
ptmr_t ctl_process(const char *ctlfile, const char *ctllmfile, const char *ctlmllrfile, int32 nskip, int32 count, void (*func) (void *kb, utt_res_t * ur, int32 sf, int32 ef, char *uttid), void *kb) { FILE *fp; FILE *ctllmfp; FILE *ctlmllrfp; char uttfile[16384], uttid[4096]; char lmname[4096]; char regmatname[4096], cb2mllrname[4096]; char tmp[4096]; int32 sf, ef; utt_res_t *ur; ptmr_t tm; kb_t *k; k = (kb_t *) kb; ctllmfp = NULL; ctlmllrfp = NULL; ur = new_utt_res(); if (ctlfile) { if ((fp = fopen(ctlfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlfile); } else fp = stdin; if (ctllmfile) { E_INFO("LM is used in this session\n"); if ((ctllmfp = fopen(ctllmfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctllmfile); } if (ctlmllrfile) { E_INFO("MLLR is used in this session\n"); if ((ctlmllrfp = fopen(ctlmllrfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlmllrfile); } ptmr_init(&tm); if (nskip > 0) { E_INFO("Skipping %d entries at the beginning of %s\n", nskip, ctlfile); for (; nskip > 0; --nskip) { if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) { fclose(fp); return tm; } /*This checks the size of the control file of the lm in batch mode */ if (ctllmfile) { if (ctl_read_entry(ctllmfp, lmname, &sf, &ef, tmp) < 0) { fclose(ctllmfp); E_ERROR ("An LM control file is specified but LM cannot be read when skipping the %d-th sentence\n", nskip); return tm; } } /*This checks the size of the control file of the mllr in batch mode */ if (ctlmllrfile) { if (ctl_read_entry(ctlmllrfp, regmatname, &sf, &ef, tmp) < 0) { fclose(ctlmllrfp); E_ERROR ("A MLLR control file is specified but MLLR cannot be read when skipping the %d-th sentence\n", nskip); return tm; } } } } for (; count > 0; --count) { int32 tmp1, tmp2; if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) break; /*This checks the size of the control file in batch mode */ if (ctllmfile) { if (ctl_read_entry(ctllmfp, lmname, &tmp1, &tmp2, tmp) < 0) { fclose(ctllmfp); E_ERROR ("LM control file is specified but LM cannot be read when counting the %d-th sentence\n", count); break; } } if (ctlmllrfile) { if (ctl_read_entry (ctlmllrfp, regmatname, &tmp1, &tmp2, cb2mllrname) < 0) { E_ERROR ("MLLR control file is specified but MLLR cannot be read when counting the %d-th sentence\n", count); break; } if (tmp2 == -1) strcpy(cb2mllrname, ".1cls."); } /* Process this utterance */ ptmr_start(&tm); if (func) { utt_res_set_uttfile(ur, uttfile); if (ctllmfile) utt_res_set_lmname(ur, lmname); if (ctlmllrfile) { utt_res_set_regmatname(ur, regmatname); utt_res_set_cb2mllrname(ur, cb2mllrname); } (*func) (kb, ur, sf, ef, uttid); } ptmr_stop(&tm); E_INFO ("%s: %6.1f sec CPU, %6.1f sec Clk; TOT: %8.1f sec CPU, %8.1f sec Clk\n\n", uttid, tm.t_cpu, tm.t_elapsed, tm.t_tot_cpu, tm.t_tot_elapsed); ptmr_reset(&tm); } if (fp) fclose(fp); if (ctllmfp) fclose(ctllmfp); if (ctlmllrfp) fclose(ctlmllrfp); if (ur) ckd_free(ur); return tm; }
ptmr_t ctl_process(char *ctlfile, char *ctlmllrfile, int32 nskip, int32 count, void (*func) (void *kb, char *uttfile, int32 sf, int32 ef, char *uttid), void *kb) { FILE *fp, *mllrfp; char uttfile[16384], uttid[4096]; char regmatfile[4096], cb2mllrfile[4096]; int32 sf, ef; ptmr_t tm; mllrfp = NULL; E_INFO("Batch mode recognition without dynamic LM\n"); if (ctlfile) { if ((fp = fopen(ctlfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlfile); } else fp = stdin; if (ctlmllrfile) { if ((mllrfp = fopen(ctlmllrfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlmllrfile); } ptmr_init(&tm); if (nskip > 0) { E_INFO("Skipping %d entries at the beginning of %s\n", nskip, ctlfile); for (; nskip > 0; --nskip) { if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) { fclose(fp); return tm; } } if (ctlmllrfile) { for (; nskip > 0; --nskip) { if (ctl_read_entry(fp, regmatfile, &sf, &ef, cb2mllrfile) < 0) { E_ERROR ("MLLR cannot be read when skipping the %d-th sentence\n", nskip); fclose(fp); return tm; } } } } for (; count > 0; --count) { if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) break; if (ctlmllrfile) { int32 tmp1, tmp2; if (ctl_read_entry (mllrfp, regmatfile, &tmp1, &tmp2, cb2mllrfile) < 0) { E_ERROR ("MLLR cannot be read when counting the %d-th sentence\n", count); break; } if (tmp2 == -1) strcpy(cb2mllrfile, ".1cls."); } /* Process this utterance */ ptmr_start(&tm); if (func) { if (ctlmllrfile) kb_setmllr(regmatfile, cb2mllrfile, kb); (*func) (kb, uttfile, sf, ef, uttid); } ptmr_stop(&tm); E_INFO ("%s: %6.1f sec CPU, %6.1f sec Clk; TOT: %8.1f sec CPU, %8.1f sec Clk\n\n", uttid, tm.t_cpu, tm.t_elapsed, tm.t_tot_cpu, tm.t_tot_elapsed); ptmr_reset(&tm); } if (fp) fclose(fp); return tm; }