int32 main(int32 argc, char *argv[]) { FILE *fpout; mgau_model_t *mgau; int32 **subvec; int32 max_datarows, datarows, datacols, svqrows, svqcols; float32 **data, **vqmean; int32 *datamap, *vqmap; float64 sqerr; int32 stdev; int32 i, j, v, m, c; cmd_ln_t *config; logmath_t *logmath; print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", arg); unlimit(); config = cmd_ln_get(); logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1, cmd_ln_int32_r(config, "-log3table")); /*Report Progress, use log table */ /* Load means/vars but DO NOT precompute variance inverses or determinants */ mgau = mgau_init(cmd_ln_str_r(config, "-mean"), cmd_ln_str_r(config, "-var"), 0.0 /* no varfloor */ , cmd_ln_str_r(config, "-mixw"), cmd_ln_float32_r(config, "-mixwfloor"), FALSE, /* No precomputation */ ".cont.", MIX_INT_FLOAT_COMP, logmath); /* Parse subvector spec argument; subvec is null terminated; subvec[x] is -1 terminated */ subvec = parse_subvecs(cmd_ln_str_r(config, "-svspec")); if (cmd_ln_str_r(config, "-subvq")) { if ((fpout = fopen(cmd_ln_str_r(config, "-subvq"), "w")) == NULL) { E_ERROR_SYSTEM("Failed to open output file '%s'", fpout); return 1; } } else fpout = stdout; /* Echo command line to output file */ for (i = 0; i < argc - 1; i++) fprintf(fpout, "# %s \\\n", argv[i]); fprintf(fpout, "# %s\n#\n", argv[argc - 1]); /* Print input and output configurations to output file */ for (v = 0; subvec[v]; v++); /* No. of subvectors */ svqrows = cmd_ln_int32_r(config, "-svqrows"); fprintf(fpout, "VQParam %d %d -> %d %d\n", mgau_n_mgau(mgau), mgau_max_comp(mgau), v, svqrows); for (v = 0; subvec[v]; v++) { for (i = 0; subvec[v][i] >= 0; i++); fprintf(fpout, "Subvector %d length %d ", v, i); for (i = 0; subvec[v][i] >= 0; i++) fprintf(fpout, " %2d", subvec[v][i]); fprintf(fpout, "\n"); } fflush(fpout); /* * datamap[] for identifying non-0 input vectors that take part in the clustering process: * datamap[m*max_mean + c] = row index of data[][] containing the copy. * vqmap[] for mapping vq input data to vq output. */ max_datarows = mgau_n_mgau(mgau) * mgau_max_comp(mgau); datamap = (int32 *) ckd_calloc(max_datarows, sizeof(int32)); vqmap = (int32 *) ckd_calloc(max_datarows, sizeof(int32)); stdev = cmd_ln_int32_r(config, "-stdev"); /* Copy and cluster each subvector */ for (v = 0; subvec[v]; v++) { E_INFO("Clustering subvector %d\n", v); for (datacols = 0; subvec[v][datacols] >= 0; datacols++); /* Input subvec length */ svqcols = datacols * 2; /* subvec length after concatenating mean + var */ /* Allocate input/output data areas */ data = (float32 **) ckd_calloc_2d(max_datarows, svqcols, sizeof(float32)); vqmean = (float32 **) ckd_calloc_2d(svqrows, svqcols, sizeof(float32)); /* Make a copy of the subvectors from the input data, and initialize maps */ for (i = 0; i < max_datarows; i++) datamap[i] = -1; datarows = 0; for (m = 0; m < mgau_n_mgau(mgau); m++) { /* For each mixture m */ for (c = 0; c < mgau_n_comp(mgau, m); c++) { /* For each component c in m */ if (vector_is_zero (mgau_var(mgau, m, c), mgau_veclen(mgau))) { E_INFO("Skipping mgau %d comp %d\n", m, c); continue; } for (i = 0; i < datacols; i++) { /* Copy specified dimensions, mean+var */ data[datarows][i * 2] = mgau->mgau[m].mean[c][subvec[v][i]]; data[datarows][i * 2 + 1] = (!stdev) ? mgau->mgau[m]. var[c][subvec[v][i]] : sqrt(mgau->mgau[m]. var[c][subvec[v][i]]); } datamap[m * mgau_max_comp(mgau) + c] = datarows++; } } E_INFO("Sanity check: input data[0]:\n"); vector_print(stderr, data[0], svqcols); for (i = 0; i < max_datarows; i++) vqmap[i] = -1; #if 0 { int32 **in; printf("Input data: %d x %d\n", datarows, svqcols); in = (int32 **) data; for (i = 0; i < datarows; i++) { printf("%8d:", i); for (j = 0; j < svqcols; j++) printf(" %08x", in[i][j]); printf("\n"); } for (i = 0; i < datarows; i++) { printf("%15d:", i); for (j = 0; j < svqcols; j++) printf(" %15.7e", data[i][j]); printf("\n"); } fflush(stdout); } #endif /* VQ the subvector copy built above */ sqerr = vector_vqgen(data, datarows, svqcols, svqrows, cmd_ln_float64_r(config, "-eps"), cmd_ln_int32_r(config, "-iter"), vqmean, vqmap, cmd_ln_int32_r(config, "-seed")); /* Output VQ */ fprintf(fpout, "Codebook %d Sqerr %e\n", v, sqerr); for (i = 0; i < svqrows; i++) { if (stdev) { /* Convert clustered stdev back to var */ for (j = 1; j < svqcols; j += 2) vqmean[i][j] *= vqmean[i][j]; } vector_print(fpout, vqmean[i], svqcols); } fprintf(fpout, "Map %d\n", v); for (i = 0; i < max_datarows; i += mgau_max_comp(mgau)) { for (j = 0; j < mgau_max_comp(mgau); j++) { if (datamap[i + j] < 0) fprintf(fpout, " -1"); else fprintf(fpout, " %d", vqmap[datamap[i + j]]); } fprintf(fpout, "\n"); } fflush(fpout); /* Cleanup */ ckd_free_2d((void **) data); ckd_free_2d((void **) vqmean); } subvecs_free(subvec); ckd_free(datamap); ckd_free(vqmap); mgau_free(mgau); fprintf(fpout, "End\n"); fclose(fpout); logmath_free(logmath); cmd_ln_free_r(config); exit(0); }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
kbcore_t *kbcore_init (float64 logbase, char *feattype, char *cmn, char *varnorm, char *agc, char *mdeffile, char *dictfile, char *fdictfile, char *compsep, char *lmfile, char *fillpenfile, float64 silprob, float64 fillprob, float64 langwt, float64 inspen, float64 uw, char *meanfile, char *varfile, float64 varfloor, char *mixwfile, float64 mixwfloor, char *subvqfile, char *tmatfile, float64 tmatfloor) { kbcore_t *kb; E_INFO("Initializing core models:\n"); kb = (kbcore_t *) ckd_calloc (1, sizeof(kbcore_t)); kb->fcb = NULL; kb->mdef = NULL; kb->dict = NULL; kb->dict2pid = NULL; kb->lm = NULL; kb->fillpen = NULL; kb->dict2lmwid = NULL; kb->mgau = NULL; kb->svq = NULL; kb->tmat = NULL; logs3_init (logbase); if (feattype) { if ((kb->fcb = feat_init (feattype, cmn, varnorm, agc)) == NULL) E_FATAL("feat_init(%s) failed\n", feattype); if (feat_n_stream(kb->fcb) != 1) E_FATAL("#Feature streams(%d) != 1\n", feat_n_stream(kb->fcb)); } if (mdeffile) { if ((kb->mdef = mdef_init (mdeffile)) == NULL) E_FATAL("mdef_init(%s) failed\n", mdeffile); } if (dictfile) { if (! compsep) compsep = ""; else if ((compsep[0] != '\0') && (compsep[1] != '\0')) { E_FATAL("Compound word separator(%s) must be empty or single character string\n", compsep); } if ((kb->dict = dict_init (kb->mdef, dictfile, fdictfile, compsep[0])) == NULL) E_FATAL("dict_init(%s,%s,%s) failed\n", dictfile, fdictfile ? fdictfile : "", compsep); } if (lmfile) { if ((kb->lm = lm_read (lmfile, langwt, inspen, uw)) == NULL) E_FATAL("lm_read(%s, %e, %e, %e) failed\n", lmfile, langwt, inspen, uw); } if (fillpenfile || (lmfile && kb->dict)) { if (! kb->dict) /* Sic */ E_FATAL("No dictionary for associating filler penalty file(%s)\n", fillpenfile); if ((kb->fillpen = fillpen_init (kb->dict, fillpenfile, silprob, fillprob, langwt, inspen)) == NULL) E_FATAL("fillpen_init(%s) failed\n", fillpenfile); } if (meanfile) { if ((! varfile) || (! mixwfile)) E_FATAL("Varfile or mixwfile not specified along with meanfile(%s)\n", meanfile); kb->mgau = mgau_init (meanfile, varfile, varfloor, mixwfile, mixwfloor, TRUE); if (kb->mgau == NULL) E_FATAL("gauden_init(%s, %s, %e) failed\n", meanfile, varfile, varfloor); if (subvqfile) { if ((kb->svq = subvq_init (subvqfile, varfloor, -1, kb->mgau)) == NULL) E_FATAL("subvq_init (%s, %e, -1) failed\n", subvqfile, varfloor); } } if (tmatfile) { if ((kb->tmat = tmat_init (tmatfile, tmatfloor)) == NULL) E_FATAL("tmat_init (%s, %e) failed\n", tmatfile, tmatfloor); } if (kb->dict && kb->lm) { /* Initialize dict2lmwid */ if ((kb->dict2lmwid = wid_dict_lm_map (kb->dict, kb->lm)) == NULL) E_FATAL("Dict/LM word-id mapping failed\n"); } if (kb->mdef && kb->dict) { /* Initialize dict2pid */ kb->dict2pid = dict2pid_build (kb->mdef, kb->dict); } /* ***************** Verifications ***************** */ E_INFO("Verifying models consistency:\n"); if (kb->fcb && kb->mgau) { /* Verify feature streams against gauden codebooks */ if (feat_stream_len(kb->fcb, 0) != mgau_veclen(kb->mgau)) E_FATAL("Feature streamlen(%d) != mgau streamlen(%d)\n", feat_stream_len(kb->fcb, 0), mgau_veclen(kb->mgau)); } if (kb->mdef && kb->mgau) { /* Verify senone parameters against model definition parameters */ if (kb->mdef->n_sen != mgau_n_mgau(kb->mgau)) E_FATAL("Mdef #senones(%d) != mgau #senones(%d)\n", kb->mdef->n_sen, mgau_n_mgau(kb->mgau)); } if (kb->mdef && kb->tmat) { /* Verify transition matrices parameters against model definition parameters */ if (kb->mdef->n_tmat != kb->tmat->n_tmat) E_FATAL("Mdef #tmat(%d) != tmatfile(%d)\n", kb->mdef->n_tmat, kb->tmat->n_tmat); if (kb->mdef->n_emit_state != kb->tmat->n_state) E_FATAL("Mdef #states(%d) != tmat #states(%d)\n", kb->mdef->n_emit_state, kb->tmat->n_state); } return kb; }
void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), "1s_c_d_dd", /* Hack!! Hardwired constant for -feat argument */ cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); E_INFO("Building lextrees\n"); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; /* Create the desired no. of unigram lextrees */ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } bitvec_free (lc_active); ckd_free ((void *) lc); /* Create filler lextrees */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0]), lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
/* Update kb w/ new dictionary and new LM. * assumes: single-LM kbcore (before & after) * requires: updating kbcore * Lucian Galescu, 08/11/2005 */ void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; /*** clean up ***/ vithist_t *vithist = kb->vithist; if (kb->fillertree) ckd_free ((void *)kb->fillertree); if (kb->hmm_hist) ckd_free ((void *)kb->hmm_hist); /* vithist */ if (vithist) { ckd_free ((void *) vithist->entry); ckd_free ((void *) vithist->frame_start); ckd_free ((void *) vithist->bestscore); ckd_free ((void *) vithist->bestvh); ckd_free ((void *) vithist->lms2vh_root); ckd_free ((void *) kb->vithist); } /*** re-initialize ***/ kb->kbcore = kbcore_update_lm(kb->kbcore, dictfile, cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ lmfile, cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw")); if(kb->kbcore==NULL){ E_FATAL("Updating kbcore failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lm){ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if (lm) { E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ }