void match_write(FILE * fp, glist_t hyp, char *uttid, dict_t * dict, char *hdr) { gnode_t *gn; srch_hyp_t *h; int counter = 0; if (fp == NULL) return; if (hyp == NULL) /* Following s3.0 convention */ fprintf(fp, "(null)"); fprintf(fp, "%s", (hdr ? hdr : "")); /* for (gn = hyp; gn && (gnode_next(gn)); gn = gnode_next(gn)) { */ for (gn = hyp; gn; gn = gnode_next(gn)) { h = (srch_hyp_t *) gnode_ptr(gn); if (h->sf != h->ef) { /* FSG outputs zero-width hyps */ if ((!dict_filler_word(dict, h->id)) && (h->id != dict_finishwid(dict)) && (h->id != dict_startwid(dict))) fprintf(fp, "%s ", dict_wordstr(dict, dict_basewid(dict, h->id))); counter++; } } if (counter == 0) fprintf(fp, " "); fprintf(fp, "(%s)\n", uttid); fflush(fp); }
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p) { search->vt = vt; search->config = config; search->acmod = acmod; if (d2p) search->d2p = dict2pid_retain(d2p); else search->d2p = NULL; if (dict) { search->dict = dict_retain(dict); search->start_wid = dict_startwid(dict); search->finish_wid = dict_finishwid(dict); search->silence_wid = dict_silwid(dict); search->n_words = dict_size(dict); } else { search->dict = NULL; search->start_wid = search->finish_wid = search->silence_wid = -1; search->n_words = 0; } }
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { dict_free(search->dict); dict2pid_free(search->d2p); /* FIXME: _retain() should just return NULL if passed NULL. */ if (dict) { search->dict = dict_retain(dict); search->start_wid = dict_startwid(dict); search->finish_wid = dict_finishwid(dict); search->silence_wid = dict_silwid(dict); search->n_words = dict_size(dict); } else { search->dict = NULL; search->start_wid = search->finish_wid = search->silence_wid = -1; search->n_words = 0; } if (d2p) search->d2p = dict2pid_retain(d2p); else search->d2p = NULL; }
int s3_decode_record_hyps(s3_decode_t * _decode, int _end_utt) { int32 i = 0; glist_t hyp_list; gnode_t *node; srch_hyp_t *hyp; char *hyp_strptr = 0; char *hyp_str = 0; srch_t *srch; srch_hyp_t **hyp_segs = 0; int hyp_seglen = 0; int hyp_strlen = 0; int finish_wid = 0; kb_t *kb = 0; dict_t *dict; int rv; if (_decode == NULL) return S3_DECODE_ERROR_NULL_POINTER; s3_decode_free_hyps(_decode); kb = &_decode->kb; dict = kbcore_dict(_decode->kbcore); srch = (srch_t *) _decode->kb.srch; hyp_list = srch_get_hyp(srch); if (hyp_list == NULL) { E_WARN("Failed to retrieve viterbi history.\n"); return S3_DECODE_ERROR_INTERNAL; } /** record the segment length and the overall string length */ finish_wid = dict_finishwid(dict); for (node = hyp_list; node != NULL; node = gnode_next(node)) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_seglen++; if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } if (hyp_strlen == 0) { hyp_strlen = 1; } /** allocate array to hold the segments and/or decoded string */ hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char)); hyp_segs = (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *)); if (hyp_segs == NULL || hyp_str == NULL) { E_WARN("Failed to allocate storage for hypothesis.\n"); rv = S3_DECODE_ERROR_OUT_OF_MEMORY; goto s3_decode_record_hyps_cleanup; } /** iterate thru to fill in the array of segments and/or decoded string */ i = 0; hyp_strptr = hyp_str; for (node = hyp_list; node != NULL; node = gnode_next(node), i++) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_segs[i] = hyp; hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id)); if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); *hyp_strptr = ' '; hyp_strptr += 1; } } glist_free(hyp_list); hyp_str[hyp_strlen - 1] = '\0'; hyp_segs[hyp_seglen] = 0; _decode->hyp_frame_num = _decode->num_frames_decoded; _decode->hyp_segs = hyp_segs; _decode->hyp_str = hyp_str; return S3_DECODE_SUCCESS; s3_decode_record_hyps_cleanup: if (hyp_segs != NULL) { ckd_free(hyp_segs); } if (hyp_str != NULL) { ckd_free(hyp_str); } if (hyp_list != NULL) { for (node = hyp_list; node != NULL; node = gnode_next(node)) { if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) { ckd_free(hyp); } } glist_free(hyp_list); } return rv; }
int ld_utt_hyps(live_decoder_t *decoder, char **hyp_str, hyp_t ***hyp_segs) { int32 id; int32 i = 0; glist_t hyp_list; gnode_t *node; hyp_t *hyp; dict_t *dict; char *hyp_strptr; kb_t *kb = &decoder->kb; if (decoder->ld_state == LD_STATE_ENDED) { if (hyp_segs) { *hyp_segs = kb->hyp_segs; } if (hyp_str) { *hyp_str = kb->hyp_str; } return 0; } else { kb_freehyps(kb); } dict = kbcore_dict (decoder->kbcore); id = vithist_partialutt_end(kb->vithist, decoder->kbcore); if (id >= 0) { hyp_list = vithist_backtrace(kb->vithist, id); /* record the segment length and the overall string length */ for (node = hyp_list; node; node = gnode_next(node)) { hyp = (hyp_t *)gnode_ptr(node); if (hyp_segs) { kb->hyp_seglen++; } if (hyp_str) { if (!dict_filler_word(dict, hyp->id) && hyp->id != dict_finishwid(dict)) { kb->hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } } /* allocate array to hold the segments and/or decoded string */ if (hyp_segs) { kb->hyp_segs = (hyp_t **)ckd_calloc(kb->hyp_seglen, sizeof(hyp_t *)); } if (hyp_str) { kb->hyp_str = (char *)ckd_calloc(kb->hyp_strlen+1, sizeof(char)); } /* iterate thru to fill in the array of segments and/or decoded string */ i = 0; if (hyp_str) { hyp_strptr = kb->hyp_str; } for (node = hyp_list; node; node = gnode_next(node), i++) { hyp = (hyp_t *)gnode_ptr(node); if (hyp_segs) { kb->hyp_segs[i] = hyp; } if (hyp_str) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); strcat(hyp_strptr, " "); hyp_strptr += 1; } } glist_free(hyp_list); if (hyp_str) { kb->hyp_str[kb->hyp_strlen - 1] = '\0'; } } if (hyp_segs) { *hyp_segs = kb->hyp_segs; } if (hyp_str) { *hyp_str = kb->hyp_str; } return 0; }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), "1s_c_d_dd", /* Hack!! Hardwired constant for -feat argument */ cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); E_INFO("Building lextrees\n"); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; /* Create the desired no. of unigram lextrees */ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } bitvec_free (lc_active); ckd_free ((void *) lc); /* Create filler lextrees */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0]), lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
/* * Compute the left and right context CIphone sets for each state. * (Needed for building the phone HMM net using cross-word triphones. Invoke * after computing null transitions closure.) */ static void word_fsg_lc_rc(word_fsg_t * fsg) { int32 s, d, i, j; int32 n_ci; gnode_t *gn; word_fsglink_t *l; int32 silcipid; int32 endwid; int32 len; dict_t *dict; mdef_t *mdef; dict = fsg->dict; mdef = fsg->mdef; assert(fsg); assert(dict); assert(mdef); endwid = dict_basewid(dict, dict_finishwid(dict)); silcipid = mdef_silphone(mdef); assert(silcipid >= 0); E_INFO("Value of silcipid %d\n", silcipid); n_ci = fsg->n_ciphone; E_INFO("No of CI phones %d\n", n_ci); if (n_ci > 127) { E_FATAL ("#phones(%d) > 127; cannot use int8** for word_fsg_t.{lc,rc}\n", n_ci); } /* * fsg->lc[s] = set of left context CIphones for state s. Similarly, rc[s] * for right context CIphones. */ fsg->lc = (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8)); fsg->rc = (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8)); for (s = 0; s < fsg->n_state; s++) { for (d = 0; d < fsg->n_state; d++) { for (gn = fsg->trans[s][d]; gn; gn = gnode_next(gn)) { l = (word_fsglink_t *) gnode_ptr(gn); assert(l->wid >= 0); /* * Add the first CIphone of l->wid to the rclist of state s, and * the last CIphone to lclist of state d. * (Filler phones are a pain to deal with. There is no direct * marking of a filler phone; but only filler words are supposed to * use such phones, so we use that fact. HACK!! FRAGILE!!) */ if (dict_filler_word(dict, l->wid) || (l->wid == endwid)) { /* Filler phone; use silence phone as context */ fsg->rc[s][silcipid] = 1; fsg->lc[d][silcipid] = 1; } else { len = dict_pronlen(dict, l->wid); fsg->rc[s][dict_pron(dict, l->wid, 0)] = 1; fsg->lc[d][dict_pron(dict, l->wid, len - 1)] = 1; } } } /* * Add SIL phone to the lclist and rclist of each state. Strictly * speaking, only needed at start and final states, respectively, but * all states considered since the user may change the start and final * states. In any case, most applications would have a silence self * loop at each state, hence these would be needed anyway. */ fsg->lc[s][silcipid] = 1; fsg->rc[s][silcipid] = 1; } /* * Propagate lc and rc lists past null transitions. (Since FSG contains * null transitions closure, no need to worry about a chain of successive * null transitions. Right??) */ for (s = 0; s < fsg->n_state; s++) { for (d = 0; d < fsg->n_state; d++) { l = fsg->null_trans[s][d]; if (l) { /* * lclist(d) |= lclist(s), because all the words ending up at s, can * now also end at d, becoming the left context for words leaving d. */ for (i = 0; i < n_ci; i++) fsg->lc[d][i] |= fsg->lc[s][i]; /* * Similarly, rclist(s) |= rclist(d), because all the words leaving d * can equivalently leave s, becoming the right context for words * ending up at s. */ for (i = 0; i < n_ci; i++) fsg->rc[s][i] |= fsg->rc[d][i]; } } } /* Convert the bit-vector representation into a list */ for (s = 0; s < fsg->n_state; s++) { j = 0; for (i = 0; i < n_ci; i++) { if (fsg->lc[s][i]) { fsg->lc[s][j] = i; j++; } } fsg->lc[s][j] = -1; /* Terminate the list */ j = 0; for (i = 0; i < n_ci; i++) { if (fsg->rc[s][i]) { fsg->rc[s][j] = i; j++; } } fsg->rc[s][j] = -1; /* Terminate the list */ } }
/* Update kb w/ new dictionary and new LM. * assumes: single-LM kbcore (before & after) * requires: updating kbcore * Lucian Galescu, 08/11/2005 */ void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; /*** clean up ***/ vithist_t *vithist = kb->vithist; if (kb->fillertree) ckd_free ((void *)kb->fillertree); if (kb->hmm_hist) ckd_free ((void *)kb->hmm_hist); /* vithist */ if (vithist) { ckd_free ((void *) vithist->entry); ckd_free ((void *) vithist->frame_start); ckd_free ((void *) vithist->bestscore); ckd_free ((void *) vithist->bestvh); ckd_free ((void *) vithist->lms2vh_root); ckd_free ((void *) kb->vithist); } /*** re-initialize ***/ kb->kbcore = kbcore_update_lm(kb->kbcore, dictfile, cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ lmfile, cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw")); if(kb->kbcore==NULL){ E_FATAL("Updating kbcore failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lm){ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if (lm) { E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ }
/* * Header written BEFORE this function is called. */ void vithist_dag_write (vithist_t *vh, glist_t hyp, dict_t *dict, int32 oldfmt, FILE *fp) { glist_t *sfwid; /* To maintain <start-frame, word-id> pair dagnodes */ vithist_entry_t *ve, *ve2; gnode_t *gn, *gn2, *gn3; dagnode_t *dn, *dn2; int32 sf, ef, n_node; int32 f, i; hyp_t *h; sfwid = (glist_t *) ckd_calloc (vh->n_frm+1, sizeof(glist_t)); n_node = 0; for (i = 0; i < vh->n_entry; i++) { /* This range includes the dummy <s> and </s> entries */ ve = vh->entry[VITHIST_ID2BLK(i)] + VITHIST_ID2BLKOFFSET(i); if (! ve->valid) continue; /* * The initial <s> entry (at 0) is a dummy, with start/end frame = -1. But the old S3 * code treats it like a real word, so we have to reintroduce it in the dag file with * a start time of 0. And shift the start time of words starting at frame 0 up by 1. * MAJOR HACK!! */ if (ve->sf <= 0) { assert (ve->sf >= -1); assert ((ve->ef == -1) || (ve->ef > 1)); sf = ve->sf + 1; ef = (ve->ef < 0) ? 0 : ve->ef; } else { sf = ve->sf; ef = ve->ef; } for (gn = sfwid[sf]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); if (dn->wid == ve->wid) break; } if (! gn) { dn = (dagnode_t *) ckd_calloc (1, sizeof(dagnode_t)); dn->wid = ve->wid; dn->fef = ef; dn->lef = ef; dn->seqid = -1; /* Initially all invalid, selected ones validated below */ dn->velist = NULL; n_node++; sfwid[sf] = glist_add_ptr (sfwid[sf], (void *) dn); } else { dn->lef = ef; } /* * Check if an entry already exists under dn->velist (generated by a different * LM context; retain only the best scoring one. */ for (gn = dn->velist; gn; gn = gnode_next(gn)) { ve2 = (vithist_entry_t *) gnode_ptr (gn); if (ve2->ef == ve->ef) break; } if (gn) { if (ve->score > ve2->score) gnode_ptr(gn) = (void *)ve; } else dn->velist = glist_add_ptr (dn->velist, (void *) ve); } /* * Validate segments with >1 end times; if only 1 end time, can be pruned. * But keep segments in the original hypothesis, regardless; mark them first. */ for (gn = hyp; gn; gn = gnode_next(gn)) { h = (hyp_t *) gnode_ptr (gn); for (gn2 = sfwid[h->sf]; gn2; gn2 = gnode_next(gn2)) { dn = (dagnode_t *) gnode_ptr (gn2); if (h->id == dn->wid) dn->seqid = 0; /* Do not discard (prune) this dagnode */ } } /* Validate startwid and finishwid nodes */ dn = (dagnode_t *) gnode_ptr(sfwid[0]); assert (dn->wid == dict_startwid(dict)); dn->seqid = 0; dn = (dagnode_t *) gnode_ptr(sfwid[vh->n_frm]); assert (dn->wid == dict_finishwid(dict)); dn->seqid = 0; /* Now prune dagnodes with only 1 end frame if not validated above */ i = 0; for (f = vh->n_frm; f >= 0; --f) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); if ((dn->lef > dn->fef) || (dn->seqid >= 0)) dn->seqid = i++; else dn->seqid = -1; /* Flag: discard */ } } n_node = i; /* Write nodes info; the header should have been written before this function is called */ fprintf (fp, "Nodes %d (NODEID WORD STARTFRAME FIRST-ENDFRAME LAST-ENDFRAME)\n", n_node); for (f = vh->n_frm; f >= 0; --f) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); if (dn->seqid >= 0) { fprintf (fp, "%d %s %d %d %d\n", dn->seqid, dict_wordstr(dict, dn->wid), f, dn->fef, dn->lef); } } } fprintf (fp, "#\n"); fprintf (fp, "Initial %d\nFinal %d\n", n_node-1, 0); fprintf (fp, "#\n"); fprintf (fp, "BestSegAscr 0 (NODEID ENDFRAME ASCORE)\n"); fprintf (fp, "#\n"); /* Edges */ if (oldfmt) fprintf (fp, "Edges (FROM-NODEID TO-NODEID ASCORE)\n"); else fprintf (fp, "Edges (FROM-NODEID ENDFRAME ASCORE)\n"); for (f = vh->n_frm-1; f >= 0; --f) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); /* Look for transitions from this dagnode to later ones, if not discarded */ if (dn->seqid < 0) continue; for (gn2 = dn->velist; gn2; gn2 = gnode_next(gn2)) { ve = (vithist_entry_t *) gnode_ptr (gn2); sf = (ve->ef < 0) ? 1 : (ve->ef + 1); if (oldfmt) { for (gn3 = sfwid[sf]; gn3; gn3 = gnode_next(gn3)) { dn2 = (dagnode_t *) gnode_ptr(gn3); if (dn2->seqid >= 0) fprintf (fp, "%d %d %d\n", dn->seqid, dn2->seqid, ve->ascr); } } else { for (gn3 = sfwid[sf]; gn3; gn3 = gnode_next(gn3)) { dn2 = (dagnode_t *) gnode_ptr(gn3); if (dn2->seqid >= 0) { fprintf (fp, "%d %d %d\n", dn->seqid, sf-1, ve->ascr); break; } } } } } } fprintf (fp, "End\n"); /* Free dagnodes structure */ for (f = 0; f <= vh->n_frm; f++) { for (gn = sfwid[f]; gn; gn = gnode_next(gn)) { dn = (dagnode_t *) gnode_ptr(gn); glist_free (dn->velist); ckd_free ((void *) dn); } glist_free (sfwid[f]); } ckd_free ((void *) sfwid); }
int32 vithist_utt_end (vithist_t *vh, kbcore_t *kbc) { int32 f, i, b, l; int32 sv, nsv, scr, bestscore, bestvh, vhid; vithist_entry_t *ve, *bestve=0; s3lmwid_t endwid; lm_t *lm; dict_t *dict; /* Find last frame with entries in vithist table */ for (f = vh->n_frm-1; f >= 0; --f) { sv = vh->frame_start[f]; /* First vithist entry in frame f */ nsv = vh->frame_start[f+1]; /* First vithist entry in next frame (f+1) */ if (sv < nsv) break; } if (f < 0) return -1; if (f != vh->n_frm-1) E_ERROR("No word exit in frame %d, using exits from frame %d\n", vh->n_frm-1, f); /* Terminate in a final </s> node (make this optional?) */ lm = kbcore_lm (kbc); endwid = lm_finishwid (lm); bestscore = MAX_NEG_INT32; bestvh = -1; for (i = sv; i < nsv; i++) { b = VITHIST_ID2BLK (i); l = VITHIST_ID2BLKOFFSET (i); ve = vh->entry[b] + l; scr = ve->score; scr += lm_tg_score (lm, ve->lmstate.lm3g.lwid[1], ve->lmstate.lm3g.lwid[0], endwid); if (bestscore < scr) { bestscore = scr; bestvh = i; bestve = ve; } } assert (bestvh >= 0); dict = kbcore_dict (kbc); if (f != vh->n_frm-1) { E_ERROR("No word exit in frame %d, using exits from frame %d\n", vh->n_frm-1, f); /* Add a dummy silwid covering the remainder of the utterance */ assert (vh->frame_start[vh->n_frm-1] == vh->frame_start[vh->n_frm]); vh->n_frm -= 1; vithist_rescore (vh, kbc, dict_silwid (dict), vh->n_frm, bestve->score, bestvh, -1); vh->n_frm += 1; vh->frame_start[vh->n_frm] = vh->n_entry; return vithist_utt_end (vh, kbc); } /* Create an </s> entry */ vhid = vh->n_entry; ve = vithist_entry_alloc (vh); ve->wid = dict_finishwid (dict); ve->sf = (bestve->ef == BAD_S3FRMID) ? 0 : bestve->ef + 1; ve->ef = vh->n_frm; ve->ascr = 0; ve->lscr = bestscore - bestve->score; ve->score = bestscore; ve->pred = bestvh; ve->type = 0; ve->valid = 1; ve->lmstate.lm3g.lwid[0] = endwid; ve->lmstate.lm3g.lwid[1] = ve->lmstate.lm3g.lwid[0]; return vhid; }
int ld_record_hyps(live_decoder_t * _decoder, int _end_utt) { int32 id; int32 i = 0; glist_t hyp_list; gnode_t *node; srch_hyp_t *hyp; char *hyp_strptr = 0; char *hyp_str = 0; srch_hyp_t **hyp_segs = 0; int hyp_seglen = 0; int hyp_strlen = 0; int finish_wid = 0; kb_t *kb = 0; dict_t *dict; int rv; assert(_decoder != NULL); ld_free_hyps(_decoder); kb = &_decoder->kb; dict = kbcore_dict(_decoder->kbcore); id = _end_utt ? vithist_utt_end(kb->vithist, _decoder->kbcore) : vithist_partialutt_end(kb->vithist, _decoder->kbcore); if (id < 0) { E_WARN("Failed to retrieve viterbi history.\n"); return LD_ERROR_INTERNAL; } /** record the segment length and the overall string length */ hyp_list = vithist_backtrace(kb->vithist, id, dict); finish_wid = dict_finishwid(dict); for (node = hyp_list; node != NULL; node = gnode_next(node)) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_seglen++; if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } if (hyp_strlen == 0) { hyp_strlen = 1; } /** allocate array to hold the segments and/or decoded string */ hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char)); hyp_segs = (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *)); if (hyp_segs == NULL || hyp_str == NULL) { E_WARN("Failed to allocate storage for hypothesis.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_record_hyps_cleanup; } /** iterate thru to fill in the array of segments and/or decoded string */ i = 0; hyp_strptr = hyp_str; for (node = hyp_list; node != NULL; node = gnode_next(node), i++) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_segs[i] = hyp; hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id)); if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); *hyp_strptr = ' '; hyp_strptr += 1; } } glist_free(hyp_list); hyp_str[hyp_strlen - 1] = '\0'; hyp_segs[hyp_seglen] = 0; _decoder->hyp_frame_num = _decoder->num_frames_decoded; _decoder->hyp_segs = hyp_segs; _decoder->hyp_str = hyp_str; return LD_SUCCESS; ld_record_hyps_cleanup: if (hyp_segs != NULL) { ckd_free(hyp_segs); } if (hyp_str != NULL) { ckd_free(hyp_segs); } if (hyp_list != NULL) { for (node = hyp_list; node != NULL; node = gnode_next(node)) { if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) { ckd_free(hyp); } } } return rv; }
void match_detailed(FILE * fp, glist_t hyp, char *uttid, char *LBL, char *lbl, int32 * senscale, dict_t * dict) { int32 ascr, lscr; int32 scl; gnode_t *gn; srch_hyp_t *h; if (fp == NULL) return; ascr = lscr = scl = 0; assert(dict); if (senscale) { fprintf(fp, "%s:%s> %20s %5s %5s %12s %10s %10s %10s\n", LBL, uttid, "WORD", "SFrm", "EFrm", "AScr(UnNorm)", "LMScore", "AScr+LScr", "AScale"); } else { fprintf(fp, "%s:%s> %20s %5s %5s %12s %10s %10s %10s\n", LBL, uttid, "WORD", "SFrm", "EFrm", "AScr(Norm)", "LMScore", "AScr+LScr", "AScale"); } for (gn = hyp; gn; gn = gnode_next(gn)) { h = (srch_hyp_t *) gnode_ptr(gn); if (h->id != dict_finishwid(dict) && h->id != dict_startwid(dict)) { scl = 0; if (h->id < 0 || (h->sf == h->ef)) continue; scl += compute_scale(h->sf, h->ef, senscale); if (senscale) { fprintf(fp, "%s:%s> %20s %5d %5d %12d %10d %10d %10d \n", lbl, uttid, dict_wordstr(dict, h->id), h->sf, h->ef, h->ascr + scl, h->lscr, h->ascr + scl + h->lscr, scl); } else { fprintf(fp, "%s:%s> %20s %5d %5d %12d %10d %10d %10d\n", lbl, uttid, dict_wordstr(dict, h->id), h->sf, h->ef, h->ascr, h->lscr, h->ascr + h->lscr, scl); } ascr += h->ascr; if (senscale) ascr += scl; lscr += h->lscr; } } fprintf(fp, "%s:%s> %20s %5s %5s %12d %10d\n", LBL, uttid, "TOTAL", "", "", ascr, lscr); }