static int32 phmm_link(allphone_search_t * allphs) { s3cipid_t ci, rc; phmm_t *p, *p2; int32 *rclist; int32 i, n_link; plink_t *l; bin_mdef_t *mdef; phmm_t **ci_phmm; mdef = ((ps_search_t *) allphs)->acmod->mdef; ci_phmm = allphs->ci_phmm; rclist = (int32 *) ckd_calloc(mdef->n_ciphone + 1, sizeof(int32)); /* Create successor links between PHMM nodes */ n_link = 0; for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = ci_phmm[ci]; p; p = p->next) { /* Build rclist for p */ i = 0; for (rc = 0; rc < mdef->n_ciphone; rc++) { if (bitvec_is_set(p->rc, rc)) rclist[i++] = rc; } rclist[i] = BAD_S3CIPID; /* For each rc in rclist, transition to PHMMs for rc if left context = ci */ for (i = 0; IS_S3CIPID(rclist[i]); i++) { for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->next) { if (bitvec_is_set(p2->lc, ci)) { /* transition from p to p2 */ l = (plink_t *) ckd_calloc(1, sizeof(*l)); l->phmm = p2; l->next = p->succlist; p->succlist = l; n_link++; } } } } } ckd_free(rclist); return n_link; }
static void get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win) { int32 f, sf, ef; ps_latnode_t *node; if (!ngs->fwdtree) { ngs->st.n_fwdflat_word_transition += ngs->n_expand_words; return; } sf = frm - win; if (sf < 0) sf = 0; ef = frm + win; if (ef > ngs->n_frame) ef = ngs->n_frame; bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs)); ngs->n_expand_words = 0; for (f = sf; f < ef; f++) { for (node = ngs->frm_wordlist[f]; node; node = node->next) { if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) { ngs->expand_word_list[ngs->n_expand_words++] = node->wid; bitvec_set(ngs->expand_word_flag, node->wid); } } } ngs->expand_word_list[ngs->n_expand_words] = -1; ngs->st.n_fwdflat_word_transition += ngs->n_expand_words; }
int32 bitvec_count_set (bitvec_t vec, int32 len) { int32 n, i; for (i = 0, n = 0; i < len; i++) if (bitvec_is_set (vec, i)) n++; return n; }
int main(int argc, char *argv[]) { bitvec_t *bv; int i, j; clock_t c; TEST_ASSERT(bv = bitvec_alloc(199)); bitvec_set(bv,198); bitvec_set(bv,0); bitvec_set(bv,42); bitvec_set(bv,43); bitvec_set(bv,44); TEST_ASSERT(bitvec_is_set(bv,198)); TEST_ASSERT(bitvec_is_set(bv,0)); TEST_ASSERT(bitvec_is_set(bv,42)); TEST_ASSERT(bitvec_is_set(bv,43)); TEST_ASSERT(bitvec_is_set(bv,44)); TEST_EQUAL(5, bitvec_count_set(bv, 199)); bitvec_clear(bv, 43); TEST_EQUAL(0, bitvec_is_set(bv,43)); c = clock(); for (j = 0; j < 1000000; ++j) bitvec_count_set(bv, 199); c = clock() - c; printf("1000000 * 199 bitvec_count_set in %.2f sec\n", (double)c / CLOCKS_PER_SEC); bitvec_free(bv); bv = bitvec_alloc(1314); c = clock(); for (j = 0; j < 50000; ++j) for (i = 0; i < 1314; ++i) bitvec_set(bv, i); c = clock() - c; printf("50000 * 1314 bitvec_set in %.2f sec\n", (double)c / CLOCKS_PER_SEC); bitvec_free(bv); /* Test realloc */ bv = bitvec_alloc(13); for (i = 1; i < 13; i+=2) bitvec_set(bv, i); printf("Bits set %d\n", bitvec_count_set(bv, 13)); TEST_EQUAL(6, bitvec_count_set(bv, 13)); bv = bitvec_realloc(bv, 13, 2000); for (i = 0; i < 2000; i++) { /* printf("%d %d\n", i, bitvec_is_set(bv, i) != 0); */ } printf("Bits set after realloc %d\n", bitvec_count_set(bv, 2000)); TEST_EQUAL(6, bitvec_count_set(bv, 2000)); bitvec_free(bv); return 0; }
static int32 senactive_to_mgauactive (acoustic_t *am) { int32 n, s; gauden_t *gau; senone_t *sen; sen = am->sen; gau = am->gau; bitvec_clear_all(am->gauden_active, gau->n_mgau); n = 0; for (s = 0; s < sen->n_sen; s++) { if (bitvec_is_set (am->sen_active, s)) { bitvec_set (am->gauden_active, sen->sen2mgau[s]); n++; } } return n; }
int gs_display(char *file, gs_t * gs) { int32 i; int32 code_id; int32 m_id, s_id, c_id; float32 tmp; bitvec_t *bv; E_INFO("Reading gaussian selector map: %s\n", file); gs = (gs_t *) ckd_calloc(1, sizeof(gs_t)); if ((gs->fp = fopen(file, "rb")) == NULL) E_FATAL("fopen(%s,rb) failed\n", file); gs->n_mgau = gs_fread_int32(gs); E_INFO("The number of mixtures of gaussian: %d\n", gs->n_mgau); gs->n_feat = gs_fread_int32(gs); E_INFO("The number of features stream: %d\n", gs->n_feat); gs->n_density = gs_fread_int32(gs); E_INFO("The number of density: %d\n", gs->n_density); gs->n_code = gs_fread_int32(gs); E_INFO("The number of code word: %d\n", gs->n_code); gs->n_featlen = gs_fread_int32(gs); E_INFO("The feature length: %d\n", gs->n_featlen); gs->n_mbyte = bitvec_size(gs->n_density) * sizeof(bitvec_t); E_INFO("The number of byte to read: %d\n", gs->n_mbyte); /* allocate the bit vector here */ bv = bitvec_alloc(gs->n_density); /* for(i=0;i<gs->n_code;i++) */ for (code_id = 0; code_id < gs->n_code; code_id++) { printf("Code idx: %d\n", code_id); for (c_id = 0; c_id < gs->n_featlen; c_id++) { tmp = gs_fread_float32(gs); printf("%f ", tmp); } printf("\n"); for (m_id = 0; m_id < gs->n_mgau; m_id++) { for (s_id = 0; s_id < gs->n_feat; s_id++) { /*The writer currently doesn't support the byte order */ gs_fread_bitvec_t(bv, gs); printf("%d %d ", m_id, s_id); for (i = 0; i < gs->n_density; i++) { if (bitvec_is_set(bv, i)) { printf("%d ", i); } } printf("\n"); } } } printf("\n"); /* bitvec_free(bv); */ /* destroy the bit vector here */ gs_free(gs); return 1; }
float64 vector_vqgen (float32 **data, int32 rows, int32 cols, int32 vqrows, float64 epsilon, int32 maxiter, float32 **mean, int32 *map) { int32 i, j, r, it; static uint32 seed = 1; float64 sqerr, prev_sqerr=0, t; bitvec_t sel; int32 *count; float32 *gmean; ptmr_t tm; assert ((rows >= vqrows) && (maxiter >= 0) && (epsilon > 0.0)); sel = bitvec_alloc (rows); ptmr_init (&tm); ptmr_start (&tm); /* Pick a random initial set of centroids */ #ifndef WIN32 /* RAH */ srandom (seed); seed ^= random(); #else /* RAH */ srand ((unsigned) time(NULL)); /* RAH */ #endif for (i = 0; i < vqrows; i++) { /* Find r = a random, previously unselected row from the input */ #ifndef WIN32 /* RAH */ r = (random() & (int32)0x7fffffff) % rows; #else /* RAH */ r = (rand() & (int32)0x7fffffff) % rows; /* RAH */ #endif /* RAH */ while (bitvec_is_set (sel, r)) { /* BUG: possible infinite loop!! */ if (++r >= rows) r = 0; } bitvec_set (sel, r); memcpy ((void *)(mean[i]), (void *)(data[r]), cols * sizeof(float32)); /* BUG: What if two randomly selected rows are identical in content?? */ } bitvec_free (sel); count = (int32 *) ckd_calloc (vqrows, sizeof(int32)); /* In k-means, unmapped means in any iteration are a problem. Replace them with gmean */ gmean = (float32 *) ckd_calloc (cols, sizeof(float32)); vector_mean (gmean, mean, vqrows, cols); for (it = 0;; it++) { /* Iterations of k-means algorithm */ /* Find the current data->mean mappings (labels) */ sqerr = 0.0; for (i = 0; i < rows; i++) { map[i] = vector_vqlabel (data[i], mean, vqrows, cols, &t); sqerr += t; } ptmr_stop(&tm); if (it == 0) E_INFO("Iter %4d: %.1fs CPU; sqerr= %e\n", it, tm.t_cpu, sqerr); else E_INFO("Iter %4d: %.1fs CPU; sqerr= %e; delta= %e\n", it, tm.t_cpu, sqerr, (prev_sqerr-sqerr)/prev_sqerr); /* Check if exit condition satisfied */ if ((sqerr == 0.0) || (it >= maxiter-1) || ((it > 0) && ( ((prev_sqerr - sqerr) / prev_sqerr) < epsilon )) ) break; prev_sqerr = sqerr; ptmr_start(&tm); /* Update (reestimate) means */ for (i = 0; i < vqrows; i++) { for (j = 0; j < cols; j++) mean[i][j] = 0.0; count[i] = 0; } for (i = 0; i < rows; i++) { vector_accum (mean[map[i]], data[i], cols); count[map[i]]++; } for (i = 0; i < vqrows; i++) { if (count[i] > 1) { t = 1.0 / (float64)(count[i]); for (j = 0; j < cols; j++) /* mean[i][j] *= t; */ /* RAH, compiler was complaining about this, */ mean[i][j] = (float32) ((float64) mean[i][j] * (float64) t); /* */ } else if (count[i] == 0) { E_ERROR("Iter %d: mean[%d] unmapped\n", it, i); memcpy (mean[i], gmean, cols * sizeof(float32)); } } } ckd_free (count); ckd_free (gmean); return sqerr; }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), "1s_c_d_dd", /* Hack!! Hardwired constant for -feat argument */ cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); E_INFO("Building lextrees\n"); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; /* Create the desired no. of unigram lextrees */ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } bitvec_free (lc_active); ckd_free ((void *) lc); /* Create filler lextrees */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0]), lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx) { int16 const *senscr; int32 nf, i, j; int32 *nawl; /* Activate our HMMs for the current frame if need be. */ if (!ps_search_acmod(ngs)->compallsen) compute_fwdflat_sen_active(ngs, frame_idx); /* Compute GMM scores for the current frame. */ senscr = acmod_score(ps_search_acmod(ngs), &frame_idx); ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active; /* Mark backpointer table for current frame. */ ngram_search_mark_bptable(ngs, frame_idx); /* If the best score is equal to or worse than WORST_SCORE, * recognition has failed, don't bother to keep trying. */ if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE) return 0; /* Renormalize if necessary */ if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) { E_INFO("Renormalizing Scores at frame %d, best score %d\n", frame_idx, ngs->best_score); fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score); } ngs->best_score = WORST_SCORE; hmm_context_set_senscore(ngs->hmmctx, senscr); /* Evaluate HMMs */ fwdflat_eval_chan(ngs, frame_idx); /* Prune HMMs and do phone transitions. */ fwdflat_prune_chan(ngs, frame_idx); /* Do word transitions. */ fwdflat_word_transition(ngs, frame_idx); /* Create next active word list */ nf = frame_idx + 1; nawl = ngs->active_word_list[nf & 0x1]; for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { if (bitvec_is_set(ngs->word_active, ngs->fwdflat_wordlist[i])) { *(nawl++) = ngs->fwdflat_wordlist[i]; j++; } } for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) { if (bitvec_is_set(ngs->word_active, i)) { *(nawl++) = i; j++; } } if (!ngs->fwdtree) ++ngs->n_frame; ngs->n_active_word[nf & 0x1] = j; /* Return the number of frames processed. */ return 1; }
/** * Find all active words in backpointer table and sort by frame. */ static void build_fwdflat_wordlist(ngram_search_t *ngs) { int32 i, f, sf, ef, wid, nwd; bptbl_t *bp; ps_latnode_t *node, *prevnode, *nextnode; /* No tree-search, use statically allocated wordlist. */ if (!ngs->fwdtree) return; memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist)); /* Scan the backpointer table for all active words and record * their exit frames. */ for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) { sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1; ef = bp->frame; wid = bp->wid; /* Anything that can be transitioned to in the LM can go in * the word list. */ if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(ps_search_dict(ngs), wid))) continue; /* Look for it in the wordlist. */ for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid); node = node->next); /* Update last end frame. */ if (node) node->lef = ef; else { /* New node; link to head of list */ node = listelem_malloc(ngs->latnode_alloc); node->wid = wid; node->fef = node->lef = ef; node->next = ngs->frm_wordlist[sf]; ngs->frm_wordlist[sf] = node; } } /* Eliminate "unlikely" words, for which there are too few end points */ for (f = 0; f < ngs->n_frame; f++) { prevnode = NULL; for (node = ngs->frm_wordlist[f]; node; node = nextnode) { nextnode = node->next; /* Word has too few endpoints */ if ((node->lef - node->fef < ngs->min_ef_width) || /* Word is </s> and doesn't actually end in last frame */ ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) { if (!prevnode) ngs->frm_wordlist[f] = nextnode; else prevnode->next = nextnode; listelem_free(ngs->latnode_alloc, node); } else prevnode = node; } } /* Form overall wordlist for 2nd pass */ nwd = 0; bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); for (f = 0; f < ngs->n_frame; f++) { for (node = ngs->frm_wordlist[f]; node; node = node->next) { if (!bitvec_is_set(ngs->word_active, node->wid)) { bitvec_set(ngs->word_active, node->wid); ngs->fwdflat_wordlist[nwd++] = node->wid; } } } ngs->fwdflat_wordlist[nwd] = -1; E_INFO("Utterance vocabulary contains %d words\n", nwd); }
/* Update kb w/ new dictionary and new LM. * assumes: single-LM kbcore (before & after) * requires: updating kbcore * Lucian Galescu, 08/11/2005 */ void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; /*** clean up ***/ vithist_t *vithist = kb->vithist; if (kb->fillertree) ckd_free ((void *)kb->fillertree); if (kb->hmm_hist) ckd_free ((void *)kb->hmm_hist); /* vithist */ if (vithist) { ckd_free ((void *) vithist->entry); ckd_free ((void *) vithist->frame_start); ckd_free ((void *) vithist->bestscore); ckd_free ((void *) vithist->bestvh); ckd_free ((void *) vithist->lms2vh_root); ckd_free ((void *) kb->vithist); } /*** re-initialize ***/ kb->kbcore = kbcore_update_lm(kb->kbcore, dictfile, cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ lmfile, cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw")); if(kb->kbcore==NULL){ E_FATAL("Updating kbcore failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lm){ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if (lm) { E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ }
int32 acoustic_eval (acoustic_t *am, int32 frm) { senone_t *sen; gauden_t *gau; int32 m, f, s, best, bestgau; int32 i, j, k; float32 **fv; float32 **mfc; sen = am->sen; gau = am->gau; if (am->mfc) { mfc = am->mfc + frm; am->fcb->compute_feat(am->fcb, mfc, am->feat[0]); fv = am->feat[0]; } else { fv = am->feat[frm]; } /* Identify the active mixture Gaussians */ if (senactive_to_mgauactive(am) == 0) E_FATAL("No states active\n"); /* Since we're going to accumulate into senscr for each feature stream */ memset (am->senscr, 0, sen->n_sen * sizeof(int32)); /* Evaluate all senones; FOR NOW NOT YET OPTIMIZED TO JUST THE ACTIVE ONES */ best = MAX_NEG_INT32; for (m = 0; m < gau->n_mgau; m++) { if (bitvec_is_set (am->gauden_active, m)) { for (f = 0; f < gau->n_feat; f++) { k = gauden_dist (gau, m, f, fv[f], am->dist); if (am->dist_valid) { /* Determine set of active mgau components based on pruning beam */ bestgau = MAX_NEG_INT32; for (i = 0; i < k; i++) if (am->dist[i] > bestgau) bestgau = am->dist[i]; j = 0; for (i = 0; i < k; i++) { if (am->dist[i] >= bestgau + am->mgaubeam) { am->dist_valid[j++] = i; } } am->n_dist_valid = j; k = j; am->tot_dist_valid += j; am->tot_mgau_eval += 1; } #if 1 for (i = 0; i < sen->mgau2sen[m].n_sen; i++) { s = sen->mgau2sen[m].sen[i]; if (bitvec_is_set (am->sen_active, s)) am->senscr[s] += senone_eval (sen, s, f, am->dist, am->dist_valid, k); } #else senone_eval_all (sen, m, f, am->dist, am->dist_valid, k, am->senscr); #endif } /* Find the best senone score so far */ for (i = 0; i < sen->mgau2sen[m].n_sen; i++) { s = sen->mgau2sen[m].sen[i]; if (bitvec_is_set (am->sen_active, s) && (am->senscr[s] > best)) best = am->senscr[s]; } } } /* CD-CI likelihood-interpolation (later) */ /* Normalize senone score by subtracting the best */ for (s = 0; s < sen->n_sen; s++) if (bitvec_is_set (am->sen_active, s)) am->senscr[s] -= best; return best; }