void vithist_rescore (vithist_t *vh, kbcore_t *kbc, s3wid_t wid, int32 ef, int32 score, int32 pred, int32 type) { vithist_entry_t *pve, tve; s3lmwid_t lwid; int32 se, fe; int32 i; assert (vh->n_frm == ef); pve = vh->entry[VITHIST_ID2BLK(pred)] + VITHIST_ID2BLKOFFSET(pred); /* Create a temporary entry with all the info currently available */ tve.wid = wid; tve.sf = pve->ef + 1; tve.ef = ef; tve.type = type; tve.valid = 1; tve.ascr = score - pve->score; if (pred == 0) { /* Special case for the initial <s> entry */ se = 0; fe = 1; } else { se = vh->frame_start[pve->ef]; fe = vh->frame_start[pve->ef + 1]; } if (dict_filler_word (kbcore_dict(kbc), wid)) { tve.lscr = fillpen (kbcore_fillpen(kbc), wid); tve.score = score + tve.lscr; tve.pred = pred; tve.lmstate.lm3g = pve->lmstate.lm3g; vithist_enter (vh, kbc, &tve); } else { lwid = kbcore_dict2lmwid (kbc, wid); tve.lmstate.lm3g.lwid[0] = lwid; for (i = se; i < fe; i++) { pve = vh->entry[VITHIST_ID2BLK(i)] + VITHIST_ID2BLKOFFSET(i); if (pve->valid) { tve.lscr = lm_tg_score (kbcore_lm(kbc), pve->lmstate.lm3g.lwid[1], pve->lmstate.lm3g.lwid[0], lwid); tve.score = pve->score + tve.ascr + tve.lscr; if ((tve.score - vh->wbeam) >= vh->bestscore[vh->n_frm]) { tve.pred = i; tve.lmstate.lm3g.lwid[1] = pve->lmstate.lm3g.lwid[0]; vithist_enter (vh, kbc, &tve); } } } } }
/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* LM */ lm_read ((char *) cmd_ln_access("-lmfn"), ""); /* Filler penalties */ fillpen_init ((char *) cmd_ln_access("-fillpenfn"), dict->filler_start, dict->filler_end); }
void match_write(FILE * fp, glist_t hyp, char *uttid, dict_t * dict, char *hdr) { gnode_t *gn; srch_hyp_t *h; int counter = 0; if (fp == NULL) return; if (hyp == NULL) /* Following s3.0 convention */ fprintf(fp, "(null)"); fprintf(fp, "%s", (hdr ? hdr : "")); /* for (gn = hyp; gn && (gnode_next(gn)); gn = gnode_next(gn)) { */ for (gn = hyp; gn; gn = gnode_next(gn)) { h = (srch_hyp_t *) gnode_ptr(gn); if (h->sf != h->ef) { /* FSG outputs zero-width hyps */ if ((!dict_filler_word(dict, h->id)) && (h->id != dict_finishwid(dict)) && (h->id != dict_startwid(dict))) fprintf(fp, "%s ", dict_wordstr(dict, dict_basewid(dict, h->id))); counter++; } } if (counter == 0) fprintf(fp, " "); fprintf(fp, "(%s)\n", uttid); fflush(fp); }
int dump_line(FILE * fp_output, seg_hyp_line_t * seg_hyp_line, dict_t * dict) { double CONFIDENCE_THRESHOLD; conf_srch_hyp_t *w; CONFIDENCE_THRESHOLD = cmd_ln_float64_r(config, "-conf_thr"); fprintf(fp_output, "%s ", seg_hyp_line->seq); fprintf(fp_output, "%d ", seg_hyp_line->cscore); fprintf(fp_output, "%d ", (int) seg_hyp_line->lmtype); for (w = seg_hyp_line->wordlist; w; w = w->next) { fprintf(fp_output, "%s ", w->sh.word); if (dict_filler_word(dict, w->sh.id) || w->sh.id == BAD_S3WID) fprintf(fp_output, "-UNK- "); else { /* fprintf(fp_output, "w->sh.cscr %d, CONFIDENCE_THRESHOLD %f\n", w->sh.cscr, CONFIDENCE_THRESHOLD); */ if (w->sh.cscr > CONFIDENCE_THRESHOLD) fprintf(fp_output, "-OK- "); else fprintf(fp_output, "-BAD- "); } fprintf(fp_output, "%d ", w->sh.cscr); fprintf(fp_output, "%3.1f ", w->lmtype); } fprintf(fp_output, "\n"); fflush(fp_output); return CONFIDENCE_SUCCESS; }
static int32 refline2wds (char *line, dagnode_t *ref, int32 *noov, char *uttid) { int32 i, n, k; s3wid_t w, wid[MAX_UTT_LEN]; n = 0; uttid[0] = '\0'; *noov = 0; if ((n = line2wid (dict, line, wid, MAX_UTT_LEN-1, 1, uttid)) < 0) E_FATAL("Error in parsing ref line: %s\n", line); wid[n++] = silwid; for (i = 0; i < n; i++) { if (dict_filler_word (dict, wid[i]) && (i < n-1)) E_FATAL("Filler word (%s) in ref: %s\n", dict_wordstr(dict, wid[i]), line); if (wid[i] >= oovbegin) { /* Perhaps one of a homophone pair */ w = hom_lookup (wid[i]); if (IS_WID(w)) wid[i] = w; if (wid[i] >= oovbegin) (*noov)++; } wid2dagnode (ref+i, i, wid[i]); } return n; }
void vithist_prune (vithist_t *vh, dict_t *dict, int32 frm, int32 maxwpf, int32 maxhist, int32 beam) { int32 se, fe, filler_done, th; vithist_entry_t *ve; heap_t h; s3wid_t *wid; int32 i; assert (frm >= 0); se = vh->frame_start[frm]; fe = vh->n_entry - 1; th = vh->bestscore[frm] + beam; h = heap_new (); wid = (s3wid_t *) ckd_calloc (maxwpf+1, sizeof(s3wid_t)); wid[0] = BAD_S3WID; for (i = se; i <= fe; i++) { ve = vh->entry[VITHIST_ID2BLK(i)] + VITHIST_ID2BLKOFFSET(i); heap_insert (h, (void *)ve, -(ve->score)); ve->valid = 0; } /* Mark invalid entries: beyond maxwpf words and below threshold */ filler_done = 0; while ((heap_pop (h, (void **)(&ve), &i) > 0) && (ve->score >= th) && (maxhist > 0)) { if (dict_filler_word (dict, ve->wid)) { /* Major HACK!! Keep only one best filler word entry per frame */ if (filler_done) continue; filler_done = 1; } /* Check if this word already valid (e.g., under a different history) */ for (i = 0; IS_S3WID(wid[i]) && (wid[i] != ve->wid); i++); if (NOT_S3WID(wid[i])) { /* New word; keep only if <maxwpf words already entered, even if >= thresh */ if (maxwpf > 0) { wid[i] = ve->wid; wid[i+1] = BAD_S3WID; --maxwpf; --maxhist; ve->valid = 1; } } else if (! vh->bghist) { --maxhist; ve->valid = 1; } } ckd_free ((void *) wid); heap_destroy (h); /* Garbage collect invalid entries */ vithist_frame_gc (vh, frm); }
/* Write hypothesis in old (pre-Nov95) NIST format */ void log_hypstr(FILE * fp, srch_hyp_t * hypptr, char *uttid, int32 exact, int32 scr, dict_t * dict) { srch_hyp_t *h; s3wid_t w; if (fp == NULL) return; if (!hypptr) /* HACK!! */ fprintf(fp, "(null)"); for (h = hypptr; h; h = h->next) { if (h->sf != h->ef) { /* Take care of abnormality caused by * FSG search or various different * reasons */ w = h->id; if (!exact) { w = dict_basewid(dict, w); if ((w != dict->startwid) && (w != dict->finishwid) && (!dict_filler_word(dict, w))) fprintf(fp, "%s ", dict_wordstr(dict, w)); } else fprintf(fp, "%s ", dict_wordstr(dict, w)); } } if (scr != 0) fprintf(fp, " (%s %d)\n", uttid, scr); else fprintf(fp, " (%s)\n", uttid); fflush(fp); }
glist_t srch_FLAT_FWD_bestpath_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; srch_hyp_t *tmph, *bph; glist_t ghyp, rhyp; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); else dag->filler_removed = 1; } bph = dag_search(dag, s->uttid, lwf, dag->end, s->kbc->dict, s->kbc->lmset->cur_lm, s->kbc->fillpen); if (bph != NULL) { ghyp = NULL; for (tmph = bph; tmph; tmph = tmph->next) ghyp = glist_add_ptr(ghyp, (void *) tmph); rhyp = glist_reverse(ghyp); return rhyp; } else { return NULL; } }
/* * Scan the dictionary for compound words. This function should be called just after * loading the dictionary. For the moment, compound words in a compound word are * assumed to be separated by the given sep character, (underscore in the CMU dict). * Return value: #compound words found in dictionary. */ static int32 dict_build_comp (dict_t *d, char sep) /* Separator character */ { char wd[4096]; int32 w, cwid; dictword_t *wordp; int32 nc; /* # compound words in dictionary */ int32 i, j, l, n; nc = 0; for (w = 0; w < d->n_word; w++) { wordp = d->word + dict_basewid(d, w); strcpy (wd, wordp->word); l = strlen(wd); if ((wd[0] == sep) || (wd[l-1] == sep)) E_FATAL("Bad compound word %s: leading or trailing separator\n", wordp->word); /* Count no. of components in this word */ n = 1; for (i = 1; i < l-1; i++) /* 0 and l-1 already checked above */ if (wd[i] == sep) n++; if (n == 1) continue; /* Not a compound word */ nc++; if ((w == d->startwid) || (w == d->finishwid) || dict_filler_word (d, w)) E_FATAL("Compound special/filler word (%s) not allowed\n", wordp->word); /* Allocate and fill in component word info */ wordp->n_comp = n; wordp->comp = (s3wid_t *) ckd_calloc (n, sizeof(s3wid_t)); /* Parse word string into components */ n = 0; for (i = 0; i < l; i++) { for (j = i; (i < l) && (wd[i] != sep); i++); if (j == i) E_FATAL("Bad compound word %s: successive separators\n", wordp->word); wd[i] = '\0'; cwid = dict_wordid (d, wd+j); if (NOT_WID(cwid)) E_FATAL("Component word %s of %s not in dictionary\n", wd+j, wordp->word); wordp->comp[n] = cwid; n++; } } if (nc > 0) d->comp_head = dict_comp_head (d); return nc; }
glist_t srch_FLAT_FWD_nbest_impl(void *srch, /**< A void pointer to a search structure */ dag_t * dag) { srch_t *s; srch_FLAT_FWD_graph_t *fwg; float32 bestpathlw; float64 lwf; char str[2000]; s = (srch_t *) srch; fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct; assert(fwg->lathist); if (!(cmd_ln_exists_r(kbcore_config(fwg->kbcore), "-nbestdir") && cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"))) return NULL; ctl_outfile(str, cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"), cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestext"), (s->uttfile ? s->uttfile : s->uttid), s->uttid, cmd_ln_boolean_r(kbcore_config(fwg->kbcore), "-build_outdirs")); bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw"); lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0; flat_fwd_dag_add_fudge_edges(fwg, dag, cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"), cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"), (void *) fwg->lathist, s->kbc->dict); /* Bypass filler nodes */ if (!dag->filler_removed) { /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */ if (dict_filler_word(s->kbc->dict, dag->end->wid)) dag->end->wid = s->kbc->dict->finishwid; dag_remove_unreachable(dag); if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0) E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); } dag_compute_hscr(dag, kbcore_dict(s->kbc), kbcore_lm(s->kbc), lwf); dag_remove_bypass_links(dag); dag->filler_removed = 0; nbest_search(dag, str, s->uttid, lwf, kbcore_dict(s->kbc), kbcore_lm(s->kbc), kbcore_fillpen(s->kbc) ); return NULL; }
main (int32 argc, char *argv[]) { char *reffile, *mdeffile, *dictfile, *fdictfile, *homfile; if (argc == 1) { cmd_ln_print_help (stderr, arglist); exit(0); } cmd_ln_parse (arglist, argc, argv); if ((mdeffile = (char *) cmd_ln_access ("-mdef")) == NULL) E_FATAL("-mdef argument missing\n"); if ((dictfile = (char *) cmd_ln_access ("-dict")) == NULL) E_FATAL("-dict argument missing\n"); if ((fdictfile = (char *) cmd_ln_access ("-fdict")) == NULL) E_FATAL("-fdict argument missing\n"); if ((reffile = (char *) cmd_ln_access ("-ref")) == NULL) E_FATAL("-ref argument missing\n"); unlimit(); mdef = mdef_init (mdeffile); if (mdef->n_ciphone <= 0) E_FATAL("0 CIphones in %s\n", mdeffile); dict = dict_init (mdef, dictfile, fdictfile); oovbegin = dict->n_word; startwid = dict_wordid (dict, "<s>"); finishwid = dict_wordid (dict, "</s>"); silwid = dict_wordid (dict, (char *) cmd_ln_access("-sil")); assert (dict_filler_word (dict, silwid)); homlist = NULL; if ((homfile = (char *) cmd_ln_access ("-hom")) != NULL) homfile_load (homfile); process_reffile (reffile); #if (0 && (! WIN32)) fflush (stdout); fflush (stderr); system ("ps aguxwww | grep dpalign"); #endif exit(0); }
/* Write hypothesis in old (pre-Nov95) NIST format */ static void log_hypstr (FILE *fp, hyp_t *hypptr, char *uttid, int32 scr) { hyp_t *h; s3wid_t w; if (! hypptr) /* HACK!! */ fprintf (fp, "(null)"); for (h = hypptr; h; h = h->next) { w = dict_basewid (h->wid); if ((w != startwid) && (w != finishwid) && (! dict_filler_word (w))) fprintf (fp, "%s ", dict_wordstr(w)); } if (scr != 0) fprintf (fp, " (%s %d)\n", uttid, scr); else fprintf (fp, " (%s)\n", uttid); fflush (fp); }
static void process_reffile (char *reffile) { FILE *rfp, *afp; char line[16384], uttid[4096]; int32 i, k, nref; s3wid_t ref[MAX_UTT_LEN]; s3cipid_t ap[MAX_UTT_LEN]; int8 ap_err[MAX_UTT_LEN]; wseg_t *wseg; if ((rfp = fopen(reffile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", reffile); afp = stdin; /* DP file read in from stdin */ while (fgets(line, sizeof(line), rfp) != NULL) { if ((nref = line2wid (dict, line, ref, MAX_UTT_LEN-1, 0, uttid)) < 0) E_FATAL("Bad line in file %s: %s\n", reffile, line); /* Check for unknown words; remove filler words; terminate with BAD_WID */ k = 0; for (i = 0; i < nref; i++) { if (NOT_WID(ref[i])) E_FATAL("Unknown word at position %d in line: %s\n", i, line); if (! dict_filler_word (dict, ref[i])) ref[k++] = ref[i]; } ref[k++] = BAD_WID; nref = k; /* Build wseg map for DP line */ if (fgets (line, sizeof(line), afp) == NULL) E_FATAL("Unexpected EOF(DP-file)\n"); wseg = line2wseg (line, ref, ap, ap_err, MAX_UTT_LEN-1, uttid); pronerr (uttid, ref, nref, wseg, ap, ap_err); } fclose (rfp); }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
main (int32 argc, char *argv[]) { kb_t kb; kbcore_t *kbcore; bitvec_t active; int32 w; cmd_ln_parse (arglist, argc, argv); unlimit(); kbcore = kbcore_init (cmd_ln_float32("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), cmd_ln_str("-compsep"), cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-senmgau"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); /* Here's the perfect candidate for inheritance */ kb.mdef = kbcore->mdef; kb.dict = kbcore->dict; kb.lm = kbcore->lm; kb.fillpen = kbcore->fillpen; kb.tmat = kbcore->tmat; kb.dict2lmwid = kbcore->dict2lmwid; if ((kb.am = acoustic_init (kbcore->fcb, kbcore->gau, kbcore->sen, cmd_ln_float32("-mgaubeam"), S3_MAX_FRAMES)) == NULL) { E_FATAL("Acoustic models initialization failed\n"); } kb.beam = logs3 (cmd_ln_float64("-beam")); kb.wordbeam = logs3 (cmd_ln_float64("-wordbeam")); kb.wordmax = cmd_ln_int32("-wordmax"); /* Mark the active words and build lextree */ active = bitvec_alloc (dict_size (kb.dict)); bitvec_clear_all (active, dict_size(kb.dict)); for (w = 0; w < dict_size(kb.dict); w++) { if (IS_LMWID(kb.dict2lmwid[w]) || dict_filler_word (kb.dict, w)) bitvec_set (active, w); } kb.lextree_root = lextree_build (kb.dict, kb.mdef, active, cmd_ln_int32("-flatdepth")); kb.vithist = (glist_t *) ckd_calloc (S3_MAX_FRAMES+2, sizeof(glist_t)); kb.vithist++; /* Allow for dummy frame -1 for start word */ kb.lextree_active = NULL; kb.wd_last_sf = (int32 *) ckd_calloc (dict_size(kb.dict), sizeof(int32)); kb.tm = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t)); kb.tm_search = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t)); ctl_process (cmd_ln_str("-ctl"), cmd_ln_int32("-ctloffset"), cmd_ln_int32("-ctlcount"), decode_utt, &kb); exit(0); }
/* Decode the given mfc file and write result to given directory */ static void utt_astar(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { char dagfile[1024], nbestfile[1024]; const char *latdir; const char *latext; const char *nbestext; dag_t *dag; int32 nfrm; if (ur->lmname) lmset_set_curlm_wname(lmset, ur->lmname); latdir = cmd_ln_str_r(config, "-inlatdir"); latext = cmd_ln_str_r(config, "-latext"); nbestext = cmd_ln_str_r(config, "-nbestext"); if (latdir) { build_output_uttfile(dagfile, latdir, uttid, ur->uttfile); strcat(dagfile, "."); strcat(dagfile, latext); } else sprintf(dagfile, "%s.%s", uttid, latext); ptmr_reset(&tm_utt); ptmr_start(&tm_utt); nfrm = 0; if ((dag = dag_load(dagfile, cmd_ln_int32_r(config, "-maxedge"), cmd_ln_float32_r(config, "-logbase"), cmd_ln_int32_r(config, "-dagfudge"), dict, fpen, config, logmath)) != NULL) { if (dict_filler_word(dict, dag->end->wid)) dag->end->wid = dict->finishwid; dag_remove_unreachable(dag); if (dag_bypass_filler_nodes(dag, 1.0, dict, fpen) < 0) { E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); goto search_done; } dag_compute_hscr(dag, dict, lmset->cur_lm, 1.0); dag_remove_bypass_links(dag); E_INFO("%5d frames, %6d nodes, %8d edges, %8d bypass\n", dag->nfrm, dag->nnode, dag->nlink, dag->nbypass); nfrm = dag->nfrm; build_output_uttfile(nbestfile, nbestdir, uttid, ur->uttfile); strcat(nbestfile, "."); strcat(nbestfile, nbestext); nbest_search(dag, nbestfile, uttid, 1.0, dict, lmset->cur_lm, fpen); lm_cache_stats_dump(lmset->cur_lm); lm_cache_reset(lmset->cur_lm); } else E_ERROR("Dag load (%s) failed\n", uttid); search_done: dag_destroy(dag); ptmr_stop(&tm_utt); printf("%s: TMR: %5d Frm", uttid, nfrm); if (nfrm > 0) { printf(" %6.2f xEl", tm_utt.t_elapsed * 100.0 / nfrm); printf(" %6.2f xCPU", tm_utt.t_cpu * 100.0 / nfrm); } printf("\n"); fflush(stdout); }
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t * mdef) { FILE *fp, *fp2; int32 n; lineiter_t *li; dict_t *d; s3cipid_t sil; char const *dictfile = NULL, *fillerfile = NULL; if (config) { dictfile = cmd_ln_str_r(config, "-dict"); fillerfile = cmd_ln_str_r(config, "-fdict"); } /* * First obtain #words in dictionary (for hash table allocation). * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate * all the required memory in one go. */ fp = NULL; n = 0; if (dictfile) { if ((fp = fopen(dictfile, "r")) == NULL) E_FATAL_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile); for (li = lineiter_start(fp); li; li = lineiter_next(li)) { if (li->buf[0] != '#') n++; } rewind(fp); } fp2 = NULL; if (fillerfile) { if ((fp2 = fopen(fillerfile, "r")) == NULL) E_FATAL_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile); for (li = lineiter_start(fp2); li; li = lineiter_next(li)) { if (li->buf[0] != '#') n++; } rewind(fp2); } /* * Allocate dict entries. HACK!! Allow some extra entries for words not in file. * Also check for type size restrictions. */ d = (dict_t *) ckd_calloc(1, sizeof(dict_t)); /* freed in dict_free() */ d->refcnt = 1; d->max_words = (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID; if (n >= MAX_S3WID) E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_S3WID); E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n", d->max_words, sizeof(dictword_t), d->max_words * sizeof(dictword_t) / 1024); d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t)); /* freed in dict_free() */ d->n_word = 0; if (mdef) d->mdef = bin_mdef_retain(mdef); /* Create new hash table for word strings; case-insensitive word strings */ if (config && cmd_ln_exists_r(config, "-dictcase")) d->nocase = cmd_ln_boolean_r(config, "-dictcase"); d->ht = hash_table_new(d->max_words, d->nocase); /* Digest main dictionary file */ if (fp) { E_INFO("Reading main dictionary: %s\n", dictfile); dict_read(fp, d); fclose(fp); E_INFO("%d words read\n", d->n_word); } /* Now the filler dictionary file, if it exists */ d->filler_start = d->n_word; if (fillerfile) { E_INFO("Reading filler dictionary: %s\n", fillerfile); dict_read(fp2, d); fclose(fp2); E_INFO("%d words read\n", d->n_word - d->filler_start); } if (mdef) sil = bin_mdef_silphone(mdef); else sil = 0; if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) { dict_add_word(d, S3_START_WORD, &sil, 1); } if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) { dict_add_word(d, S3_FINISH_WORD, &sil, 1); } if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) { dict_add_word(d, S3_SILENCE_WORD, &sil, 1); } d->filler_end = d->n_word - 1; /* Initialize distinguished word-ids */ d->startwid = dict_wordid(d, S3_START_WORD); d->finishwid = dict_wordid(d, S3_FINISH_WORD); d->silwid = dict_wordid(d, S3_SILENCE_WORD); if ((d->filler_start > d->filler_end) || (!dict_filler_word(d, d->silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", S3_SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ return d; }
int s3_decode_record_hyps(s3_decode_t * _decode, int _end_utt) { int32 i = 0; glist_t hyp_list; gnode_t *node; srch_hyp_t *hyp; char *hyp_strptr = 0; char *hyp_str = 0; srch_t *srch; srch_hyp_t **hyp_segs = 0; int hyp_seglen = 0; int hyp_strlen = 0; int finish_wid = 0; kb_t *kb = 0; dict_t *dict; int rv; if (_decode == NULL) return S3_DECODE_ERROR_NULL_POINTER; s3_decode_free_hyps(_decode); kb = &_decode->kb; dict = kbcore_dict(_decode->kbcore); srch = (srch_t *) _decode->kb.srch; hyp_list = srch_get_hyp(srch); if (hyp_list == NULL) { E_WARN("Failed to retrieve viterbi history.\n"); return S3_DECODE_ERROR_INTERNAL; } /** record the segment length and the overall string length */ finish_wid = dict_finishwid(dict); for (node = hyp_list; node != NULL; node = gnode_next(node)) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_seglen++; if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } if (hyp_strlen == 0) { hyp_strlen = 1; } /** allocate array to hold the segments and/or decoded string */ hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char)); hyp_segs = (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *)); if (hyp_segs == NULL || hyp_str == NULL) { E_WARN("Failed to allocate storage for hypothesis.\n"); rv = S3_DECODE_ERROR_OUT_OF_MEMORY; goto s3_decode_record_hyps_cleanup; } /** iterate thru to fill in the array of segments and/or decoded string */ i = 0; hyp_strptr = hyp_str; for (node = hyp_list; node != NULL; node = gnode_next(node), i++) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_segs[i] = hyp; hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id)); if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); *hyp_strptr = ' '; hyp_strptr += 1; } } glist_free(hyp_list); hyp_str[hyp_strlen - 1] = '\0'; hyp_segs[hyp_seglen] = 0; _decode->hyp_frame_num = _decode->num_frames_decoded; _decode->hyp_segs = hyp_segs; _decode->hyp_str = hyp_str; return S3_DECODE_SUCCESS; s3_decode_record_hyps_cleanup: if (hyp_segs != NULL) { ckd_free(hyp_segs); } if (hyp_str != NULL) { ckd_free(hyp_str); } if (hyp_list != NULL) { for (node = hyp_list; node != NULL; node = gnode_next(node)) { if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) { ckd_free(hyp); } } glist_free(hyp_list); } return rv; }
/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { float32 varfloor, mixwfloor, tpfloor; int32 i, s; s3cipid_t ci; s3wid_t w; char *arg; dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* Codebooks */ varfloor = *((float32 *) cmd_ln_access("-varfloor")); g = gauden_init ((char *) cmd_ln_access("-meanfn"), (char *) cmd_ln_access("-varfn"), varfloor); /* Verify codebook feature dimensions against libfeat */ n_feat = feat_featsize (&featlen); if (n_feat != g->n_feat) E_FATAL("#feature mismatch: s2= %d, mean/var= %d\n", n_feat, g->n_feat); for (i = 0; i < n_feat; i++) if (featlen[i] != g->featlen[i]) E_FATAL("featlen[%d] mismatch: s2= %d, mean/var= %d\n", i, featlen[i], g->featlen[i]); /* Senone mixture weights */ mixwfloor = *((float32 *) cmd_ln_access("-mwfloor")); sen = senone_init ((char *) cmd_ln_access("-mixwfn"), (char *) cmd_ln_access("-senmgaufn"), mixwfloor); /* Verify senone parameters against gauden parameters */ if (sen->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, sen->n_feat); if (sen->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, sen->n_cw); if (sen->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); if (sen->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); /* Verify senone parameters against model definition parameters */ if (mdef->n_sen != sen->n_sen) E_FATAL("Model definition has %d senones; but #senone= %d\n", mdef->n_sen, sen->n_sen); /* CD/CI senone interpolation weights file, if present */ if ((arg = (char *) cmd_ln_access ("-lambdafn")) != NULL) { interp = interp_init (arg); /* Verify interpolation weights size with senones */ if (interp->n_sen != sen->n_sen) E_FATAL("Interpolation file has %d weights; but #senone= %d\n", interp->n_sen, sen->n_sen); } else interp = NULL; /* Transition matrices */ tpfloor = *((float32 *) cmd_ln_access("-tpfloor")); tmat = tmat_init ((char *) cmd_ln_access("-tmatfn"), tpfloor); /* Verify transition matrices parameters against model definition parameters */ if (mdef->n_tmat != tmat->n_tmat) E_FATAL("Model definition has %d tmat; but #tmat= %d\n", mdef->n_tmat, tmat->n_tmat); if (mdef->n_emit_state != tmat->n_state-1) E_FATAL("#Emitting states in model definition = %d, #states in tmat = %d\n", mdef->n_emit_state, tmat->n_state); arg = (char *) cmd_ln_access ("-agc"); if ((strcmp (arg, "max") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -agc argument: %s\n", arg); arg = (char *) cmd_ln_access ("-cmn"); if ((strcmp (arg, "current") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -cmn argument: %s\n", arg); }
int ld_utt_hyps(live_decoder_t *decoder, char **hyp_str, hyp_t ***hyp_segs) { int32 id; int32 i = 0; glist_t hyp_list; gnode_t *node; hyp_t *hyp; dict_t *dict; char *hyp_strptr; kb_t *kb = &decoder->kb; if (decoder->ld_state == LD_STATE_ENDED) { if (hyp_segs) { *hyp_segs = kb->hyp_segs; } if (hyp_str) { *hyp_str = kb->hyp_str; } return 0; } else { kb_freehyps(kb); } dict = kbcore_dict (decoder->kbcore); id = vithist_partialutt_end(kb->vithist, decoder->kbcore); if (id >= 0) { hyp_list = vithist_backtrace(kb->vithist, id); /* record the segment length and the overall string length */ for (node = hyp_list; node; node = gnode_next(node)) { hyp = (hyp_t *)gnode_ptr(node); if (hyp_segs) { kb->hyp_seglen++; } if (hyp_str) { if (!dict_filler_word(dict, hyp->id) && hyp->id != dict_finishwid(dict)) { kb->hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } } /* allocate array to hold the segments and/or decoded string */ if (hyp_segs) { kb->hyp_segs = (hyp_t **)ckd_calloc(kb->hyp_seglen, sizeof(hyp_t *)); } if (hyp_str) { kb->hyp_str = (char *)ckd_calloc(kb->hyp_strlen+1, sizeof(char)); } /* iterate thru to fill in the array of segments and/or decoded string */ i = 0; if (hyp_str) { hyp_strptr = kb->hyp_str; } for (node = hyp_list; node; node = gnode_next(node), i++) { hyp = (hyp_t *)gnode_ptr(node); if (hyp_segs) { kb->hyp_segs[i] = hyp; } if (hyp_str) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); strcat(hyp_strptr, " "); hyp_strptr += 1; } } glist_free(hyp_list); if (hyp_str) { kb->hyp_str[kb->hyp_strlen - 1] = '\0'; } } if (hyp_segs) { *hyp_segs = kb->hyp_segs; } if (hyp_str) { *hyp_str = kb->hyp_str; } return 0; }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } } glist_free(lc_pnodelist); } else { /* Filler word; no context modelled */ ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; fsg_pnode_add_all_ctxt(&(pnode->ctxt)); pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); } } else { /* Multi-phone word */ fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ ssid_pnode_map = (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); lc_pnodelist = NULL; rc_pnodelist = NULL; for (p = 0; p < pronlen; p++) { int ci = dict_pron(lextree->dict, dictwid, p); if (p == 0) { /* Root phone, handle required left contexts */ /* Find if we already have an lc_pnodelist for the first phone of this word */ fsg_glist_linklist_t *predglist=*curglist; fsg_glist_linklist_t *glist=*curglist; rc = dict_pron(lextree->dict, dictwid, 1); while (glist && glist->glist && glist->ci != ci && glist->rc != rc){ glist = glist->next; } if (glist && glist->ci == ci && glist->rc == rc && glist->glist) { /* We've found a valid glist. Hook to it and move to next phoneme */ lc_pnodelist = glist->glist; /* Set the predecessor node for the future tree first */ pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); continue; } else { /* Two cases that can bring us here * a. glist == NULL, i.e. end of current list. Create new entry. * b. glist->glist == NULL, i.e. first entry into list. */ if (!glist) { /* Case a; reduce it to case b by allocing glist */ glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); glist->next = predglist; *curglist = glist; } glist->ci = ci; glist->rc = rc; glist->lc = -1; lc_pnodelist = glist->glist = NULL; /* Gets created below */ } for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); /* Compression is not done by d2p, so we do it * here. This might be slow, but it might not * be... we'll see. */ pnode = ssid_pnode_map[0]; for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { pnode = ssid_pnode_map[j]; if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) break; } assert(j < n_ci); if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* This bit is tricky! For now we'll put the prob in the final link only */ /* pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; */ pnode->logs2prob = lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = FALSE; pnode->sibling = root; /* All root nodes linked together */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } fsg_pnode_add_ctxt(pnode, lc); } /* Put the lc_pnodelist back into glist */ glist->glist = lc_pnodelist; /* The predecessor node for the future tree is the root */ pred = root; } else if (p != pronlen - 1) { /* Word internal phone */ fsg_pnode_t *pnodeyoungest; ssid = dict2pid_internal(lextree->d2p, dictwid, p); /* First check if we already have this ssid in our tree */ pnode = pred->next.succ; pnodeyoungest = pnode; /* The youngest sibling */ while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { pnode = pnode->sibling; } if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { /* Found the ssid; go to next phoneme */ pred = pnode; continue; } /* pnode not found, allocate it */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->logs2prob = lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = FALSE; pnode->sibling = pnodeyoungest; /* May be NULL */ if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); pred->next.succ = pnode; } } else { /* Predecessor = word internal node */ pred->next.succ = pnode; } pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); pred = pnode; } else { /* Leaf phone, handle required right contexts */ /* Note, leaf phones are not part of the tree */ xwdssid_t *rssid; memset((void *) ssid_pnode_map, 0, n_ci * sizeof(fsg_pnode_t *)); lc = dict_pron(lextree->dict, dictwid, p-1); rssid = dict2pid_rssid(lextree->d2p, ci, lc); for (i = 0; rclist[i] >= 0; i++) { rc = rclist[i]; j = rssid->cimap[rc]; ssid = rssid->ssid[j]; pnode = ssid_pnode_map[j]; if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* We are plugging the word prob here. Ugly */ /* pnode->logs2prob = lextree->pip; */ pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = TRUE; pnode->sibling = rc_pnodelist ? (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; pnode->next.fsglink = fsglink; pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); rc_pnodelist = glist_add_ptr(rc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } else { assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); } fsg_pnode_add_ctxt(pnode, rc); } if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); /* Since all entries of lc_pnodelist point to the same array, sufficient to update it once */ break; } } } else { /* Predecessor = word internal node */ if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); } } } } ckd_free((void *) ssid_pnode_map); /* glist_free(lc_pnodelist); Nope; this gets freed outside */ glist_free(rc_pnodelist); } *alloc_head = head; return root; }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ int32 tmatid; gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; ++n_lc_alloc; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } }
void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), "1s_c_d_dd", /* Hack!! Hardwired constant for -feat argument */ cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); E_INFO("Building lextrees\n"); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; /* Create the desired no. of unigram lextrees */ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } bitvec_free (lc_active); ckd_free ((void *) lc); /* Create filler lextrees */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0]), lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
/* * Compute the left and right context CIphone sets for each state. * (Needed for building the phone HMM net using cross-word triphones. Invoke * after computing null transitions closure.) */ static void word_fsg_lc_rc(word_fsg_t * fsg) { int32 s, d, i, j; int32 n_ci; gnode_t *gn; word_fsglink_t *l; int32 silcipid; int32 endwid; int32 len; dict_t *dict; mdef_t *mdef; dict = fsg->dict; mdef = fsg->mdef; assert(fsg); assert(dict); assert(mdef); endwid = dict_basewid(dict, dict_finishwid(dict)); silcipid = mdef_silphone(mdef); assert(silcipid >= 0); E_INFO("Value of silcipid %d\n", silcipid); n_ci = fsg->n_ciphone; E_INFO("No of CI phones %d\n", n_ci); if (n_ci > 127) { E_FATAL ("#phones(%d) > 127; cannot use int8** for word_fsg_t.{lc,rc}\n", n_ci); } /* * fsg->lc[s] = set of left context CIphones for state s. Similarly, rc[s] * for right context CIphones. */ fsg->lc = (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8)); fsg->rc = (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8)); for (s = 0; s < fsg->n_state; s++) { for (d = 0; d < fsg->n_state; d++) { for (gn = fsg->trans[s][d]; gn; gn = gnode_next(gn)) { l = (word_fsglink_t *) gnode_ptr(gn); assert(l->wid >= 0); /* * Add the first CIphone of l->wid to the rclist of state s, and * the last CIphone to lclist of state d. * (Filler phones are a pain to deal with. There is no direct * marking of a filler phone; but only filler words are supposed to * use such phones, so we use that fact. HACK!! FRAGILE!!) */ if (dict_filler_word(dict, l->wid) || (l->wid == endwid)) { /* Filler phone; use silence phone as context */ fsg->rc[s][silcipid] = 1; fsg->lc[d][silcipid] = 1; } else { len = dict_pronlen(dict, l->wid); fsg->rc[s][dict_pron(dict, l->wid, 0)] = 1; fsg->lc[d][dict_pron(dict, l->wid, len - 1)] = 1; } } } /* * Add SIL phone to the lclist and rclist of each state. Strictly * speaking, only needed at start and final states, respectively, but * all states considered since the user may change the start and final * states. In any case, most applications would have a silence self * loop at each state, hence these would be needed anyway. */ fsg->lc[s][silcipid] = 1; fsg->rc[s][silcipid] = 1; } /* * Propagate lc and rc lists past null transitions. (Since FSG contains * null transitions closure, no need to worry about a chain of successive * null transitions. Right??) */ for (s = 0; s < fsg->n_state; s++) { for (d = 0; d < fsg->n_state; d++) { l = fsg->null_trans[s][d]; if (l) { /* * lclist(d) |= lclist(s), because all the words ending up at s, can * now also end at d, becoming the left context for words leaving d. */ for (i = 0; i < n_ci; i++) fsg->lc[d][i] |= fsg->lc[s][i]; /* * Similarly, rclist(s) |= rclist(d), because all the words leaving d * can equivalently leave s, becoming the right context for words * ending up at s. */ for (i = 0; i < n_ci; i++) fsg->rc[s][i] |= fsg->rc[d][i]; } } } /* Convert the bit-vector representation into a list */ for (s = 0; s < fsg->n_state; s++) { j = 0; for (i = 0; i < n_ci; i++) { if (fsg->lc[s][i]) { fsg->lc[s][j] = i; j++; } } fsg->lc[s][j] = -1; /* Terminate the list */ j = 0; for (i = 0; i < n_ci; i++) { if (fsg->rc[s][i]) { fsg->rc[s][j] = i; j++; } } fsg->rc[s][j] = -1; /* Terminate the list */ } }
/* Update kb w/ new dictionary and new LM. * assumes: single-LM kbcore (before & after) * requires: updating kbcore * Lucian Galescu, 08/11/2005 */ void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; /*** clean up ***/ vithist_t *vithist = kb->vithist; if (kb->fillertree) ckd_free ((void *)kb->fillertree); if (kb->hmm_hist) ckd_free ((void *)kb->hmm_hist); /* vithist */ if (vithist) { ckd_free ((void *) vithist->entry); ckd_free ((void *) vithist->frame_start); ckd_free ((void *) vithist->bestscore); ckd_free ((void *) vithist->bestvh); ckd_free ((void *) vithist->lms2vh_root); ckd_free ((void *) kb->vithist); } /*** re-initialize ***/ kb->kbcore = kbcore_update_lm(kb->kbcore, dictfile, cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ lmfile, cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw")); if(kb->kbcore==NULL){ E_FATAL("Updating kbcore failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lm){ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if (lm) { E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ }
int ld_record_hyps(live_decoder_t * _decoder, int _end_utt) { int32 id; int32 i = 0; glist_t hyp_list; gnode_t *node; srch_hyp_t *hyp; char *hyp_strptr = 0; char *hyp_str = 0; srch_hyp_t **hyp_segs = 0; int hyp_seglen = 0; int hyp_strlen = 0; int finish_wid = 0; kb_t *kb = 0; dict_t *dict; int rv; assert(_decoder != NULL); ld_free_hyps(_decoder); kb = &_decoder->kb; dict = kbcore_dict(_decoder->kbcore); id = _end_utt ? vithist_utt_end(kb->vithist, _decoder->kbcore) : vithist_partialutt_end(kb->vithist, _decoder->kbcore); if (id < 0) { E_WARN("Failed to retrieve viterbi history.\n"); return LD_ERROR_INTERNAL; } /** record the segment length and the overall string length */ hyp_list = vithist_backtrace(kb->vithist, id, dict); finish_wid = dict_finishwid(dict); for (node = hyp_list; node != NULL; node = gnode_next(node)) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_seglen++; if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } if (hyp_strlen == 0) { hyp_strlen = 1; } /** allocate array to hold the segments and/or decoded string */ hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char)); hyp_segs = (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *)); if (hyp_segs == NULL || hyp_str == NULL) { E_WARN("Failed to allocate storage for hypothesis.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_record_hyps_cleanup; } /** iterate thru to fill in the array of segments and/or decoded string */ i = 0; hyp_strptr = hyp_str; for (node = hyp_list; node != NULL; node = gnode_next(node), i++) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_segs[i] = hyp; hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id)); if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); *hyp_strptr = ' '; hyp_strptr += 1; } } glist_free(hyp_list); hyp_str[hyp_strlen - 1] = '\0'; hyp_segs[hyp_seglen] = 0; _decoder->hyp_frame_num = _decoder->num_frames_decoded; _decoder->hyp_segs = hyp_segs; _decoder->hyp_str = hyp_str; return LD_SUCCESS; ld_record_hyps_cleanup: if (hyp_segs != NULL) { ckd_free(hyp_segs); } if (hyp_str != NULL) { ckd_free(hyp_segs); } if (hyp_list != NULL) { for (node = hyp_list; node != NULL; node = gnode_next(node)) { if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) { ckd_free(hyp); } } } return rv; }
int32 live_get_partialhyp(int32 endutt) { int32 id, nwds; glist_t hyp; gnode_t *gn; hyp_t *h; dict_t *dict; dict = kbcore_dict (kb->kbcore); if (endutt) id = vithist_utt_end(kb->vithist, kb->kbcore); else id = vithist_partialutt_end(kb->vithist, kb->kbcore); if (id > 0) { hyp = vithist_backtrace(kb->vithist,id); for (gn = hyp,nwds=0; gn; gn = gnode_next(gn),nwds++) { h = (hyp_t *) gnode_ptr (gn); if (parthyp[nwds].word != NULL) { ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } /* 20040905 L Galescu <*****@*****.**> * Report noise? If not, replace with silence word. */ if ((cmd_ln_int32("-reportfill") == 0) && dict_filler_word(dict, h->id)) parthyp[nwds].word = strdup(dict_wordstr(dict, dict->silwid)); else parthyp[nwds].word = strdup(dict_wordstr(dict, h->id)); /* 20040901 L Galescu <*****@*****.**> * Choice to report alternative pronunciations or not. */ if (cmd_ln_int32("-reportpron") == 0) dict_word2basestr(parthyp[nwds].word); parthyp[nwds].sf = h->sf; parthyp[nwds].ef = h->ef; parthyp[nwds].ascr = h->ascr; parthyp[nwds].lscr = h->lscr; } if (parthyp[nwds].word != NULL){ ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } parthyplen = nwds; /* Free hyplist */ for (gn = hyp; gn && (gnode_next(gn)); gn = gnode_next(gn)) { h = (hyp_t *) gnode_ptr (gn); ckd_free ((void *) h); } glist_free (hyp); } else if (id == 0) { nwds = 0; if (parthyp[nwds].word != NULL) { ckd_free(parthyp[nwds].word); parthyp[nwds].word = NULL; } parthyplen = nwds; } else { /* lgalescu 2004/10/13: nothing changed; we return the same hyp that was there before */ } return(parthyplen); }