void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p) { search->vt = vt; search->config = config; search->acmod = acmod; if (d2p) search->d2p = dict2pid_retain(d2p); else search->d2p = NULL; if (dict) { search->dict = dict_retain(dict); search->start_wid = dict_startwid(dict); search->finish_wid = dict_finishwid(dict); search->silence_wid = dict_silwid(dict); search->n_words = dict_size(dict); } else { search->dict = NULL; search->start_wid = search->finish_wid = search->silence_wid = -1; search->n_words = 0; } }
int32 vithist_utt_begin (vithist_t *vh, kbcore_t *kbc) { vithist_entry_t *ve; lm_t *lm; dict_t *dict; lm = kbcore_lm(kbc); dict = kbcore_dict(kbc); assert (vh->n_entry == 0); assert (vh->entry[0] == NULL); assert (vh->lwidlist == NULL); /* Create an initial dummy <s> entry. This is the root for the utterance */ ve = vithist_entry_alloc (vh); ve->wid = dict_startwid(dict); ve->sf = -1; ve->ef = -1; ve->ascr = 0; ve->lscr = 0; ve->score = 0; ve->pred = -1; ve->type = 0; ve->valid = 1; ve->lmstate.lm3g.lwid[0] = lm_startwid(lm); ve->lmstate.lm3g.lwid[1] = BAD_S3LMWID; vh->n_frm = 0; vh->frame_start[0] = 1; vh->bestscore[0] = MAX_NEG_INT32; vh->bestvh[0] = -1; return 0; }
void match_write(FILE * fp, glist_t hyp, char *uttid, dict_t * dict, char *hdr) { gnode_t *gn; srch_hyp_t *h; int counter = 0; if (fp == NULL) return; if (hyp == NULL) /* Following s3.0 convention */ fprintf(fp, "(null)"); fprintf(fp, "%s", (hdr ? hdr : "")); /* for (gn = hyp; gn && (gnode_next(gn)); gn = gnode_next(gn)) { */ for (gn = hyp; gn; gn = gnode_next(gn)) { h = (srch_hyp_t *) gnode_ptr(gn); if (h->sf != h->ef) { /* FSG outputs zero-width hyps */ if ((!dict_filler_word(dict, h->id)) && (h->id != dict_finishwid(dict)) && (h->id != dict_startwid(dict))) fprintf(fp, "%s ", dict_wordstr(dict, dict_basewid(dict, h->id))); counter++; } } if (counter == 0) fprintf(fp, " "); fprintf(fp, "(%s)\n", uttid); fflush(fp); }
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { dict_free(search->dict); dict2pid_free(search->d2p); /* FIXME: _retain() should just return NULL if passed NULL. */ if (dict) { search->dict = dict_retain(dict); search->start_wid = dict_startwid(dict); search->finish_wid = dict_finishwid(dict); search->silence_wid = dict_silwid(dict); search->n_words = dict_size(dict); } else { search->dict = NULL; search->start_wid = search->finish_wid = search->silence_wid = -1; search->n_words = 0; } if (d2p) search->d2p = dict2pid_retain(d2p); else search->d2p = NULL; }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), "1s_c_d_dd", /* Hack!! Hardwired constant for -feat argument */ cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); E_INFO("Building lextrees\n"); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; /* Create the desired no. of unigram lextrees */ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } bitvec_free (lc_active); ckd_free ((void *) lc); /* Create filler lextrees */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0]), lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
/* Update kb w/ new dictionary and new LM. * assumes: single-LM kbcore (before & after) * requires: updating kbcore * Lucian Galescu, 08/11/2005 */ void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; /*** clean up ***/ vithist_t *vithist = kb->vithist; if (kb->fillertree) ckd_free ((void *)kb->fillertree); if (kb->hmm_hist) ckd_free ((void *)kb->hmm_hist); /* vithist */ if (vithist) { ckd_free ((void *) vithist->entry); ckd_free ((void *) vithist->frame_start); ckd_free ((void *) vithist->bestscore); ckd_free ((void *) vithist->bestvh); ckd_free ((void *) vithist->lms2vh_root); ckd_free ((void *) kb->vithist); } /*** re-initialize ***/ kb->kbcore = kbcore_update_lm(kb->kbcore, dictfile, cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ lmfile, cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw")); if(kb->kbcore==NULL){ E_FATAL("Updating kbcore failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lm){ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if (lm) { E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ }
/* * Header written BEFORE this function is called. */ void vithist_dag_write (vithist_t *vh, glist_t hyp, dict_t *dict, int32 oldfmt, FILE *fp) { glist_t *sfwid; /* To maintain <start-frame, word-id> pair dagnodes */ vithist_entry_t *ve, *ve2; gnode_t *gn, *gn2, *gn3; dagnode_t *dn, *dn2; int32 sf, ef, n_node; int32 f, i; hyp_t *h; sfwid = (glist_t *) ckd_calloc (vh->n_frm+1, sizeof(glist_t)); n_node = 0; for (i = 0; i < vh->n_entry; i++) { /* This range includes the dummy <s> and </s> entries */ ve = vh->entry[VITHIST_ID2BLK(i)] + VITHIST_ID2BLKOFFSET(i); if (! ve->valid) continue; /* * The initial <s> entry (at 0) is a dummy, with start/end frame = -1. But the old S3 * code treats it like a real word, so we have to reintroduce it in the dag file with * a start time of 0. And shift the start time of words starting at frame 0 up by 1. * MAJOR HACK!! */ if (ve->sf <= 0) { assert (ve->sf >= -1); assert ((ve->ef == -1) || (ve->ef > 1)); sf = ve->sf + 1; ef = (ve->ef < 0) ? 0 : ve->ef; } else { sf = ve->sf; ef = ve->ef; } for (gn = sfwid[sf]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); if (dn->wid == ve->wid) break; } if (! gn) { dn = (dagnode_t *) ckd_calloc (1, sizeof(dagnode_t)); dn->wid = ve->wid; dn->fef = ef; dn->lef = ef; dn->seqid = -1; /* Initially all invalid, selected ones validated below */ dn->velist = NULL; n_node++; sfwid[sf] = glist_add_ptr (sfwid[sf], (void *) dn); } else { dn->lef = ef; } /* * Check if an entry already exists under dn->velist (generated by a different * LM context; retain only the best scoring one. */ for (gn = dn->velist; gn; gn = gnode_next(gn)) { ve2 = (vithist_entry_t *) gnode_ptr (gn); if (ve2->ef == ve->ef) break; } if (gn) { if (ve->score > ve2->score) gnode_ptr(gn) = (void *)ve; } else dn->velist = glist_add_ptr (dn->velist, (void *) ve); } /* * Validate segments with >1 end times; if only 1 end time, can be pruned. * But keep segments in the original hypothesis, regardless; mark them first. */ for (gn = hyp; gn; gn = gnode_next(gn)) { h = (hyp_t *) gnode_ptr (gn); for (gn2 = sfwid[h->sf]; gn2; gn2 = gnode_next(gn2)) { dn = (dagnode_t *) gnode_ptr (gn2); if (h->id == dn->wid) dn->seqid = 0; /* Do not discard (prune) this dagnode */ } } /* Validate startwid and finishwid nodes */ dn = (dagnode_t *) gnode_ptr(sfwid[0]); assert (dn->wid == dict_startwid(dict)); dn->seqid = 0; dn = (dagnode_t *) gnode_ptr(sfwid[vh->n_frm]); assert (dn->wid == dict_finishwid(dict)); dn->seqid = 0; /* Now prune dagnodes with only 1 end frame if not validated above */ i = 0; for (f = vh->n_frm; f >= 0; --f) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); if ((dn->lef > dn->fef) || (dn->seqid >= 0)) dn->seqid = i++; else dn->seqid = -1; /* Flag: discard */ } } n_node = i; /* Write nodes info; the header should have been written before this function is called */ fprintf (fp, "Nodes %d (NODEID WORD STARTFRAME FIRST-ENDFRAME LAST-ENDFRAME)\n", n_node); for (f = vh->n_frm; f >= 0; --f) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); if (dn->seqid >= 0) { fprintf (fp, "%d %s %d %d %d\n", dn->seqid, dict_wordstr(dict, dn->wid), f, dn->fef, dn->lef); } } } fprintf (fp, "#\n"); fprintf (fp, "Initial %d\nFinal %d\n", n_node-1, 0); fprintf (fp, "#\n"); fprintf (fp, "BestSegAscr 0 (NODEID ENDFRAME ASCORE)\n"); fprintf (fp, "#\n"); /* Edges */ if (oldfmt) fprintf (fp, "Edges (FROM-NODEID TO-NODEID ASCORE)\n"); else fprintf (fp, "Edges (FROM-NODEID ENDFRAME ASCORE)\n"); for (f = vh->n_frm-1; f >= 0; --f) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); /* Look for transitions from this dagnode to later ones, if not discarded */ if (dn->seqid < 0) continue; for (gn2 = dn->velist; gn2; gn2 = gnode_next(gn2)) { ve = (vithist_entry_t *) gnode_ptr (gn2); sf = (ve->ef < 0) ? 1 : (ve->ef + 1); if (oldfmt) { for (gn3 = sfwid[sf]; gn3; gn3 = gnode_next(gn3)) { dn2 = (dagnode_t *) gnode_ptr(gn3); if (dn2->seqid >= 0) fprintf (fp, "%d %d %d\n", dn->seqid, dn2->seqid, ve->ascr); } } else { for (gn3 = sfwid[sf]; gn3; gn3 = gnode_next(gn3)) { dn2 = (dagnode_t *) gnode_ptr(gn3); if (dn2->seqid >= 0) { fprintf (fp, "%d %d %d\n", dn->seqid, sf-1, ve->ascr); break; } } } } } } fprintf (fp, "End\n"); /* Free dagnodes structure */ for (f = 0; f <= vh->n_frm; f++) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); glist_free (dn->velist); ckd_free ((void *) dn); } glist_free (sfwid[f]); } ckd_free ((void *) sfwid); }
void match_detailed(FILE * fp, glist_t hyp, char *uttid, char *LBL, char *lbl, int32 * senscale, dict_t * dict) { int32 ascr, lscr; int32 scl; gnode_t *gn; srch_hyp_t *h; if (fp == NULL) return; ascr = lscr = scl = 0; assert(dict); if (senscale) { fprintf(fp, "%s:%s> %20s %5s %5s %12s %10s %10s %10s\n", LBL, uttid, "WORD", "SFrm", "EFrm", "AScr(UnNorm)", "LMScore", "AScr+LScr", "AScale"); } else { fprintf(fp, "%s:%s> %20s %5s %5s %12s %10s %10s %10s\n", LBL, uttid, "WORD", "SFrm", "EFrm", "AScr(Norm)", "LMScore", "AScr+LScr", "AScale"); } for (gn = hyp; gn; gn = gnode_next(gn)) { h = (srch_hyp_t *) gnode_ptr(gn); if (h->id != dict_finishwid(dict) && h->id != dict_startwid(dict)) { scl = 0; if (h->id < 0 || (h->sf == h->ef)) continue; scl += compute_scale(h->sf, h->ef, senscale); if (senscale) { fprintf(fp, "%s:%s> %20s %5d %5d %12d %10d %10d %10d \n", lbl, uttid, dict_wordstr(dict, h->id), h->sf, h->ef, h->ascr + scl, h->lscr, h->ascr + scl + h->lscr, scl); } else { fprintf(fp, "%s:%s> %20s %5d %5d %12d %10d %10d %10d\n", lbl, uttid, dict_wordstr(dict, h->id), h->sf, h->ef, h->ascr, h->lscr, h->ascr + h->lscr, scl); } ascr += h->ascr; if (senscale) ascr += scl; lscr += h->lscr; } } fprintf(fp, "%s:%s> %20s %5s %5s %12d %10d\n", LBL, uttid, "TOTAL", "", "", ascr, lscr); }