/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
dtree_t * mk_tree_comp(float32 ****mixw, /* ADDITION FOR CONTINUOUS_TREES, 18 May 98 */ float32 ****means, float32 ****vars, uint32 *veclen, /* END ADDITIONS FOR CONTINUOUS_TREES */ uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, uint32 *id, uint32 n_id, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 n_base_phone, uint32 **dfeat, uint32 n_dfeat, uint32 split_min, uint32 split_max, float32 split_thr, uint32 split_min_comp, uint32 split_max_comp, float32 split_thr_comp, float32 mwfloor) { dtree_t *comp_tree; dtree_node_t *root, *b_n; uint32 i; comp_tree = ckd_calloc(1, sizeof(dtree_t)); comp_tree->node = ckd_calloc(2*split_max_comp+1, sizeof(dtree_node_t)); comp_tree->n_node = 0; comp_tree->node[0].node_id = 0; comp_tree->n_node = 1; root = &comp_tree->node[0]; /* MODIFICATION FOR CONTINUOUS_TREES 18 May 98, pass means and var along with mixw */ mk_node(root, 0, id, n_id, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, mwfloor); /* END MODIFICATION FOR CONTINUOUS_TREES */ /* MODIFICATION FOR CONTINUOUS_TREES 18 May 98, pass means and var along with mixw */ root->q = (void *)mk_comp_quest(&root->wt_ent_dec, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, id, n_id, all_q, n_all_q, pset, n_base_phone, dfeat, n_dfeat, split_min, split_max, split_thr, mwfloor); /* END MODIFICATION FOR CONTINUOUS_TREES */ for (i = 0; i < split_max_comp; i++) { b_n = best_leaf_node(root); E_INFO("Comp split %u\n", i); if (b_n == NULL) { E_INFO("stop. leaf nodes are specific\n"); break; } if (b_n->wt_ent_dec <= 0) { E_INFO("stop. b_n->wt_ent_dec (%.3e) <= 0\n", b_n->wt_ent_dec); break; } if ((i > split_min_comp) && (b_n->wt_ent_dec < split_thr_comp * b_n->wt_ent)) { E_INFO("stop. b_n->wt_ent_dec <= split_thr_comp * b_n->wt_ent. %.3e <= %.3e\n", b_n->wt_ent_dec, split_thr_comp * b_n->wt_ent); break; } /* MODIFICATION FOR CONTINUOUS_TREES 18 May 98, pass means and var along with mixw */ split_node_comp(comp_tree, b_n->node_id, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, all_q, n_all_q, pset, n_base_phone, dfeat, n_dfeat, split_min, split_max, split_thr, mwfloor); /* END MODIFICATION FOR CONTINUOUS_TREES */ #if 0 printf("Comp Split %u:\n", i); print_tree_comp(stderr, "*", root, pset, 0); fprintf(stderr, "\n"); #endif } #if 0 E_INFO("Final Comp Tree %u:\n", i); print_tree_comp(stderr, "", root, pset, 0); fprintf(stderr, "\n"); #endif return comp_tree; }
fillpen_t *fillpen_init (dict_t *dict, char *file, float64 silprob, float64 fillprob, float64 lw, float64 wip) { s3wid_t w, bw; float64 prob; FILE *fp; char line[1024], wd[1024]; int32 k; fillpen_t *_fillpen; _fillpen = (fillpen_t *) ckd_calloc (1, sizeof(fillpen_t)); _fillpen->dict = dict; _fillpen->lw = lw; _fillpen->wip = wip; if (dict->filler_end >= dict->filler_start) _fillpen->prob = (int32 *) ckd_calloc (dict->filler_end - dict->filler_start + 1, sizeof(int32)); else _fillpen->prob = NULL; /* Initialize all words with filler penalty (HACK!! backward compatibility) */ prob = fillprob; for (w = dict->filler_start; w <= dict->filler_end; w++) _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); /* Overwrite silence penalty (HACK!! backward compatibility) */ w = dict_wordid (dict, S3_SILENCE_WORD); if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end)) E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD); prob = silprob; _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); /* Overwrite with filler prob input file, if specified */ if (! file) return _fillpen; E_INFO("Reading filler penalty file: %s\n", file); if ((fp = fopen (file, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", file); while (fgets (line, sizeof(line), fp) != NULL) { if (line[0] == '#') /* Skip comment lines */ continue; k = sscanf (line, "%s %lf", wd, &prob); if ((k != 0) && (k != 2)) E_FATAL("Bad input line: %s\n", line); w = dict_wordid(dict, wd); if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end)) E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD); _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); } fclose (fp); /* Replicate fillpen values for alternative pronunciations */ for (w = dict->filler_start; w <= dict->filler_end; w++) { bw = dict_basewid (dict, w); if (bw != w) _fillpen->prob[w-dict->filler_start] = _fillpen->prob[bw-dict->filler_start]; } return _fillpen; }
int ps_start_utt(ps_decoder_t *ps, char const *uttid) { FILE *mfcfh = NULL; FILE *rawfh = NULL; int rv; if (ps->search == NULL) { E_ERROR("No search module is selected, did you forget to " "specify a language model or grammar?\n"); return -1; } ptmr_reset(&ps->perf); ptmr_start(&ps->perf); if (uttid) { ckd_free(ps->uttid); ps->uttid = ckd_salloc(uttid); } else { char nuttid[16]; ckd_free(ps->uttid); sprintf(nuttid, "%09u", ps->uttno); ps->uttid = ckd_salloc(nuttid); ++ps->uttno; } /* Remove any residual word lattice and hypothesis. */ ps_lattice_free(ps->search->dag); ps->search->dag = NULL; ps->search->last_link = NULL; ps->search->post = 0; ckd_free(ps->search->hyp_str); ps->search->hyp_str = NULL; if ((rv = acmod_start_utt(ps->acmod)) < 0) return rv; /* Start logging features and audio if requested. */ if (ps->mfclogdir) { char *logfn = string_join(ps->mfclogdir, "/", ps->uttid, ".mfc", NULL); E_INFO("Writing MFCC log file: %s\n", logfn); if ((mfcfh = fopen(logfn, "wb")) == NULL) { E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn); ckd_free(logfn); return -1; } ckd_free(logfn); acmod_set_mfcfh(ps->acmod, mfcfh); } if (ps->rawlogdir) { char *logfn = string_join(ps->rawlogdir, "/", ps->uttid, ".raw", NULL); E_INFO("Writing raw audio log file: %s\n", logfn); if ((rawfh = fopen(logfn, "wb")) == NULL) { E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn); ckd_free(logfn); return -1; } ckd_free(logfn); acmod_set_rawfh(ps->acmod, rawfh); } /* Start auxiliary phone loop search. */ if (ps->phone_loop) ps_search_start(ps->phone_loop); return ps_search_start(ps->search); }
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) { acmod_t *acmod; char const *featparams; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->config = cmd_ln_retain(config); acmod->lmath = lmath; acmod->state = ACMOD_IDLE; /* Look for feat.params in acoustic model dir. */ if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) { if (NULL != cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE)) E_INFO("Parsed model-specific feature parameters from %s\n", featparams); } /* Initialize feature computation. */ if (fe) { if (acmod_fe_mismatch(acmod, fe)) goto error_out; fe_retain(fe); acmod->fe = fe; } else { /* Initialize a new front end. */ acmod->fe = fe_init_auto_r(config); if (acmod->fe == NULL) goto error_out; if (acmod_fe_mismatch(acmod, acmod->fe)) goto error_out; } if (fcb) { if (acmod_feat_mismatch(acmod, fcb)) goto error_out; feat_retain(fcb); acmod->fcb = fcb; } else { /* Initialize a new fcb. */ if (acmod_init_feat(acmod) < 0) goto error_out; } /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; /* The MFCC buffer needs to be at least as large as the dynamic * feature window. */ acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; acmod->mfc_buf = (mfcc_t **) ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, sizeof(**acmod->mfc_buf)); /* Feature buffer has to be at least as large as MFCC buffer. */ acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window"); acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); acmod->utt_start_frame = 0; /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); return acmod; error_out: acmod_free(acmod); return NULL; }
kbcore_t *kbcore_init (float64 logbase, char *feattype, char *cmn, char *varnorm, char *agc, char *mdeffile, char *dictfile, char *fdictfile, char *compsep, char *lmfile, char *fillpenfile, float64 silprob, float64 fillprob, float64 langwt, float64 inspen, float64 uw, char *meanfile, char *varfile, float64 varfloor, char *mixwfile, float64 mixwfloor, char *subvqfile, char *tmatfile, float64 tmatfloor) { kbcore_t *kb; E_INFO("Initializing core models:\n"); kb = (kbcore_t *) ckd_calloc (1, sizeof(kbcore_t)); kb->fcb = NULL; kb->mdef = NULL; kb->dict = NULL; kb->dict2pid = NULL; kb->lm = NULL; kb->fillpen = NULL; kb->dict2lmwid = NULL; kb->mgau = NULL; kb->svq = NULL; kb->tmat = NULL; logs3_init (logbase); if (feattype) { if ((kb->fcb = feat_init (feattype, cmn, varnorm, agc)) == NULL) E_FATAL("feat_init(%s) failed\n", feattype); if (feat_n_stream(kb->fcb) != 1) E_FATAL("#Feature streams(%d) != 1\n", feat_n_stream(kb->fcb)); } if (mdeffile) { if ((kb->mdef = mdef_init (mdeffile)) == NULL) E_FATAL("mdef_init(%s) failed\n", mdeffile); } if (dictfile) { if (! compsep) compsep = ""; else if ((compsep[0] != '\0') && (compsep[1] != '\0')) { E_FATAL("Compound word separator(%s) must be empty or single character string\n", compsep); } if ((kb->dict = dict_init (kb->mdef, dictfile, fdictfile, compsep[0])) == NULL) E_FATAL("dict_init(%s,%s,%s) failed\n", dictfile, fdictfile ? fdictfile : "", compsep); } if (lmfile) { if ((kb->lm = lm_read (lmfile, langwt, inspen, uw)) == NULL) E_FATAL("lm_read(%s, %e, %e, %e) failed\n", lmfile, langwt, inspen, uw); } if (fillpenfile || (lmfile && kb->dict)) { if (! kb->dict) /* Sic */ E_FATAL("No dictionary for associating filler penalty file(%s)\n", fillpenfile); if ((kb->fillpen = fillpen_init (kb->dict, fillpenfile, silprob, fillprob, langwt, inspen)) == NULL) E_FATAL("fillpen_init(%s) failed\n", fillpenfile); } if (meanfile) { if ((! varfile) || (! mixwfile)) E_FATAL("Varfile or mixwfile not specified along with meanfile(%s)\n", meanfile); kb->mgau = mgau_init (meanfile, varfile, varfloor, mixwfile, mixwfloor, TRUE); if (kb->mgau == NULL) E_FATAL("gauden_init(%s, %s, %e) failed\n", meanfile, varfile, varfloor); if (subvqfile) { if ((kb->svq = subvq_init (subvqfile, varfloor, -1, kb->mgau)) == NULL) E_FATAL("subvq_init (%s, %e, -1) failed\n", subvqfile, varfloor); } } if (tmatfile) { if ((kb->tmat = tmat_init (tmatfile, tmatfloor)) == NULL) E_FATAL("tmat_init (%s, %e) failed\n", tmatfile, tmatfloor); } if (kb->dict && kb->lm) { /* Initialize dict2lmwid */ if ((kb->dict2lmwid = wid_dict_lm_map (kb->dict, kb->lm)) == NULL) E_FATAL("Dict/LM word-id mapping failed\n"); } if (kb->mdef && kb->dict) { /* Initialize dict2pid */ kb->dict2pid = dict2pid_build (kb->mdef, kb->dict); } /* ***************** Verifications ***************** */ E_INFO("Verifying models consistency:\n"); if (kb->fcb && kb->mgau) { /* Verify feature streams against gauden codebooks */ if (feat_stream_len(kb->fcb, 0) != mgau_veclen(kb->mgau)) E_FATAL("Feature streamlen(%d) != mgau streamlen(%d)\n", feat_stream_len(kb->fcb, 0), mgau_veclen(kb->mgau)); } if (kb->mdef && kb->mgau) { /* Verify senone parameters against model definition parameters */ if (kb->mdef->n_sen != mgau_n_mgau(kb->mgau)) E_FATAL("Mdef #senones(%d) != mgau #senones(%d)\n", kb->mdef->n_sen, mgau_n_mgau(kb->mgau)); } if (kb->mdef && kb->tmat) { /* Verify transition matrices parameters against model definition parameters */ if (kb->mdef->n_tmat != kb->tmat->n_tmat) E_FATAL("Mdef #tmat(%d) != tmatfile(%d)\n", kb->mdef->n_tmat, kb->tmat->n_tmat); if (kb->mdef->n_emit_state != kb->tmat->n_state) E_FATAL("Mdef #states(%d) != tmat #states(%d)\n", kb->mdef->n_emit_state, kb->tmat->n_state); } return kb; }
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx) { int16 const *senscr; int32 nf, i, j; int32 *nawl; /* Activate our HMMs for the current frame if need be. */ if (!ps_search_acmod(ngs)->compallsen) compute_fwdflat_sen_active(ngs, frame_idx); /* Compute GMM scores for the current frame. */ senscr = acmod_score(ps_search_acmod(ngs), &frame_idx); ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active; /* Mark backpointer table for current frame. */ ngram_search_mark_bptable(ngs, frame_idx); /* If the best score is equal to or worse than WORST_SCORE, * recognition has failed, don't bother to keep trying. */ if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE) return 0; /* Renormalize if necessary */ if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) { E_INFO("Renormalizing Scores at frame %d, best score %d\n", frame_idx, ngs->best_score); fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score); } ngs->best_score = WORST_SCORE; hmm_context_set_senscore(ngs->hmmctx, senscr); /* Evaluate HMMs */ fwdflat_eval_chan(ngs, frame_idx); /* Prune HMMs and do phone transitions. */ fwdflat_prune_chan(ngs, frame_idx); /* Do word transitions. */ fwdflat_word_transition(ngs, frame_idx); /* Create next active word list */ nf = frame_idx + 1; nawl = ngs->active_word_list[nf & 0x1]; for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { if (bitvec_is_set(ngs->word_active, ngs->fwdflat_wordlist[i])) { *(nawl++) = ngs->fwdflat_wordlist[i]; j++; } } for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) { if (bitvec_is_set(ngs->word_active, i)) { *(nawl++) = i; j++; } } if (!ngs->fwdtree) ++ngs->n_frame; ngs->n_active_word[nf & 0x1] = j; /* Return the number of frames processed. */ return 1; }
/* Read a Sphinx3 mean or variance file. */ static int32 s3_read_mgau(s2_semi_mgau_t *s, const char *file_name, float32 ***out_cb) { char tmp; FILE *fp; int32 i, blk, n; int32 n_mgau; int32 n_feat; int32 n_density; int32 *veclen; int32 byteswap, chksum_present; char **argname, **argval; uint32 chksum; E_INFO("Reading S3 mixture gaussian file '%s'\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MGAU_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MGAU_PARAM_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* #Codebooks */ if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#codebooks) failed\n", file_name); if (n_mgau != 1) { E_ERROR("%s: #codebooks (%d) != 1\n", file_name, n_mgau); fclose(fp); return -1; } /* #Features/codebook */ if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#features) failed\n", file_name); if (s->n_feat == 0) s->n_feat = n_feat; else if (n_feat != s->n_feat) E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat); /* #Gaussian densities/feature in each codebook */ if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#density/codebook) failed\n", file_name); if (s->n_density == 0) s->n_density = n_density; else if (n_density != s->n_density) E_FATAL("%s: Number of densities per feature(%d) != %d\n", file_name, n_mgau, s->n_density); /* Vector length of feature stream */ veclen = ckd_calloc(s->n_feat, sizeof(int32)); if (bio_fread(veclen, sizeof(int32), s->n_feat, fp, byteswap, &chksum) != s->n_feat) E_FATAL("fread(%s) (feature vector-length) failed\n", file_name); for (i = 0, blk = 0; i < s->n_feat; ++i) { if (s->veclen[i] == 0) s->veclen[i] = veclen[i]; else if (veclen[i] != s->veclen[i]) E_FATAL("feature stream length %d is inconsistent (%d != %d)\n", i, veclen[i], s->veclen[i]); blk += veclen[i]; } /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */ if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (total #floats) failed\n", file_name); if (n != n_mgau * n_density * blk) E_FATAL ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, n, n_mgau, n_density, blk); *out_cb = ckd_calloc(s->n_feat, sizeof(float32 *)); for (i = 0; i < s->n_feat; ++i) { (*out_cb)[i] = (float32 *) ckd_calloc(n_density * veclen[i], sizeof(float32)); if (bio_fread ((*out_cb)[i], sizeof(float32), n_density * veclen[i], fp, byteswap, &chksum) != n_density * veclen[i]) E_FATAL("fread(%s, %d) of feat %d failed\n", file_name, n_density * veclen[i], i); } ckd_free(veclen); if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&tmp, 1, 1, fp) == 1) E_FATAL("%s: More data than expected\n", file_name); fclose(fp); E_INFO("%d mixture Gaussians, %d components, %d feature streams, veclen %d\n", n_mgau, n_density, n_feat, blk); return n; }
int ps_end_utt(ps_decoder_t *ps) { int rv, i; if (ps->acmod->state == ACMOD_ENDED || ps->acmod->state == ACMOD_IDLE) { E_ERROR("Utterance is not started\n"); return -1; } acmod_end_utt(ps->acmod); /* Search any remaining frames. */ if ((rv = ps_search_forward(ps)) < 0) { ptmr_stop(&ps->perf); return rv; } /* Finish phone loop search. */ if (ps->phone_loop) { if ((rv = ps_search_finish(ps->phone_loop)) < 0) { ptmr_stop(&ps->perf); return rv; } } /* Search any frames remaining in the lookahead window. */ if (ps->acmod->output_frame >= ps->pl_window) { for (i = ps->acmod->output_frame - ps->pl_window; i < ps->acmod->output_frame; ++i) ps_search_step(ps->search, i); } /* Finish main search. */ if ((rv = ps_search_finish(ps->search)) < 0) { ptmr_stop(&ps->perf); return rv; } ptmr_stop(&ps->perf); /* Log a backtrace if requested. */ if (cmd_ln_boolean_r(ps->config, "-backtrace")) { const char* hyp; ps_seg_t *seg; int32 score; hyp = ps_get_hyp(ps, &score); if (hyp != NULL) { E_INFO("%s (%d)\n", hyp, score); E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", "word", "start", "end", "pprob", "ascr", "lscr", "lback"); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } } } return rv; }
static int32 read_sendump(s2_semi_mgau_t *s, mdef_t *mdef, char const *file) { FILE *fp; char line[1000]; int32 i, n; int32 do_swap, do_mmap; size_t filesize, offset; int n_clust = 256; /* Number of clusters (if zero, we are just using * 8-bit quantized weights) */ int r = s->n_density; int c = mdef_n_sen(mdef); s->n_sen = c; do_mmap = cmd_ln_boolean_r(s->config, "-mmap"); if ((fp = fopen(file, "rb")) == NULL) return -1; E_INFO("Loading senones from dump file %s\n", file); /* Read title size, title */ fread(&n, sizeof(int32), 1, fp); /* This is extremely bogus */ do_swap = 0; if (n < 1 || n > 999) { SWAP_INT32(&n); if (n < 1 || n > 999) { E_FATAL("Title length %x in dump file %s out of range\n", n, file); } do_swap = 1; } if (fread(line, sizeof(char), n, fp) != n) E_FATAL("Cannot read title\n"); if (line[n - 1] != '\0') E_FATAL("Bad title in dump file\n"); E_INFO("%s\n", line); /* Read header size, header */ fread(&n, 1, sizeof(n), fp); if (do_swap) SWAP_INT32(&n); if (fread(line, sizeof(char), n, fp) != n) E_FATAL("Cannot read header\n"); if (line[n - 1] != '\0') E_FATAL("Bad header in dump file\n"); /* Read other header strings until string length = 0 */ for (;;) { fread(&n, 1, sizeof(n), fp); if (do_swap) SWAP_INT32(&n); if (n == 0) break; if (fread(line, sizeof(char), n, fp) != n) E_FATAL("Cannot read header\n"); /* Look for a cluster count, if present */ if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) { n_clust = atoi(line + strlen("cluster_count ")); } } /* Read #codewords, #pdfs */ fread(&r, 1, sizeof(r), fp); if (do_swap) SWAP_INT32(&r); fread(&c, 1, sizeof(c), fp); if (do_swap) SWAP_INT32(&c); E_INFO("Rows: %d, Columns: %d\n", r, c); if (n_clust) { E_ERROR ("Dump file is incompatible with PocketSphinx\n"); fclose(fp); return -1; } if (do_mmap) { E_INFO("Using memory-mapped I/O for senones\n"); } offset = ftell(fp); fseek(fp, 0, SEEK_END); filesize = ftell(fp); fseek(fp, offset, SEEK_SET); /* Allocate memory for pdfs (or memory map them) */ if (do_mmap) s->sendump_mmap = mmio_file_read(file); /* Otherwise, set up all pointers, etc. */ if (s->sendump_mmap) { s->mixw = ckd_calloc(s->n_feat, sizeof(*s->mixw)); for (i = 0; i < s->n_feat; i++) { /* Pointers into the mmap()ed 2d array */ s->mixw[i] = ckd_calloc(r, sizeof(**s->mixw)); } for (n = 0; n < s->n_feat; n++) { for (i = 0; i < r; i++) { s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; offset += c; } } } else { s->mixw = ckd_calloc_3d(s->n_feat, r, c, sizeof(***s->mixw)); /* Read pdf values and ids */ for (n = 0; n < s->n_feat; n++) { for (i = 0; i < r; i++) { if (fread(s->mixw[n][i], sizeof(***s->mixw), c, fp) != (size_t) c) { E_ERROR("Failed to read %d bytes from sendump\n", c); return -1; } } } } fclose(fp); return 0; }
static int32 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin) { char **argname, **argval; char eofchk; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, f, c, n; int32 n_sen; int32 n_feat; int32 n_comp; int32 n_err; E_INFO("Reading mixture weights file '%s'\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MGAU_MIXW_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (n_feat != s->n_feat) E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat); if (n != n_sen * n_feat * n_comp) { E_FATAL ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", file_name, i, n_sen, n_feat, n_comp); } /* n_sen = number of mixture weights per codeword, which is * fixed at the number of senones since we have only one codebook. */ s->n_sen = n_sen; /* Quantized mixture weight arrays. */ s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw)); /* Temporary structure to read in floats before conversion to (int32) logs3 */ pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < n_sen; i++) { for (f = 0; f < n_feat; f++) { if (bio_fread((void *) pdf, sizeof(float32), n_comp, fp, byteswap, &chksum) != n_comp) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if (vector_sum_norm(pdf, n_comp) <= 0.0) n_err++; vector_floor(pdf, n_comp, SmoothMin); vector_sum_norm(pdf, n_comp); /* Convert to LOG, quantize, and transpose */ for (c = 0; c < n_comp; c++) { int32 qscr; qscr = -logmath_log(s->lmath_8b, pdf[c]); if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) qscr = MAX_NEG_MIXW; s->mixw[f][c][i] = qscr; } } } if (n_err > 0) E_WARN("Weight normalization failed for %d senones\n", n_err); ckd_free(pdf); if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); return n_sen; }
s2_semi_mgau_t * s2_semi_mgau_init(cmd_ln_t *config, logmath_t *lmath, feat_t *fcb, mdef_t *mdef) { s2_semi_mgau_t *s; char const *sendump_path; float32 **fgau; int i; s = ckd_calloc(1, sizeof(*s)); s->config = config; s->lmath = logmath_retain(lmath); /* Log-add table. */ s->lmath_8b = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE); if (s->lmath_8b == NULL) { s2_semi_mgau_free(s); return NULL; } /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ if (logmath_get_width(s->lmath_8b) != 1) { E_ERROR("Log base %f is too small to represent add table in 8 bits\n", logmath_get_base(s->lmath_8b)); s2_semi_mgau_free(s); return NULL; } /* Inherit stream dimensions from acmod, will be checked below. */ s->n_feat = feat_dimension1(fcb); s->veclen = ckd_calloc(s->n_feat, sizeof(int32)); for (i = 0; i < s->n_feat; ++i) s->veclen[i] = feat_dimension2(fcb, i); /* Read means and variances. */ if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fgau) < 0) { s2_semi_mgau_free(s); return NULL; } s->means = (mfcc_t **)fgau; if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fgau) < 0) { s2_semi_mgau_free(s); return NULL; } s->vars = (mfcc_t **)fgau; /* Precompute (and fixed-point-ize) means, variances, and determinants. */ s->dets = (mfcc_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets)); s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor")); /* Read mixture weights */ if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) read_sendump(s, mdef, sendump_path); else read_mixw(s, cmd_ln_str_r(s->config, "-mixw"), cmd_ln_float32_r(s->config, "-mixwfloor")); s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); /* Determine top-N for each feature */ s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam)); s->max_topn = cmd_ln_int32_r(s->config, "-topn"); split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat); E_INFO("Maximum top-N: %d ", s->max_topn); E_INFOCONT("Top-N beams:"); for (i = 0; i < s->n_feat; ++i) { E_INFOCONT(" %d", s->topn_beam[i]); } E_INFOCONT("\n"); /* Top-N scores from recent frames */ s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; s->topn_hist = (vqFeature_t ***) ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn, sizeof(***s->topn_hist)); s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat, sizeof(**s->topn_hist_n)); for (i = 0; i < s->n_topn_hist; ++i) { int j; for (j = 0; j < s->n_feat; ++j) { int k; for (k = 0; k < s->max_topn; ++k) { s->topn_hist[i][j][k].score = WORST_DIST; s->topn_hist[i][j][k].codeword = k; } } } return s; }
int parse_cmd_ln(int argc, char *argv[]) { uint32 isHelp; uint32 isExample; const char helpstr[] = "Description: \n\ (copied from Eric's comments)\n\ * Create a tied-state-to-codebook mapping file for semi-continuous, \n\ * phone dependent or fully continuous Gaussian density tying."; const char examplestr[]= "Example: \n\ (By Arthur: Not sure, may be obsolete) \n\ mk_ts2cb -moddeffn semi -ts2cbfn ts2cb"; static arg_t defn[] = { { "-help", ARG_BOOLEAN, "no", "Shows the usage of the tool" }, { "-example", ARG_BOOLEAN, "no", "Shows example of how to use the tool" }, { "-ts2cbfn", ARG_STRING, NULL, "A SPHINX-III tied-state-to-cb file name" }, { "-moddeffn", ARG_STRING, NULL, "A SPHINX-III model definition file name" }, { "-tyingtype", /* either "semi", "pd", or "cont" */ ARG_STRING, "semi", "Output a state parameter def file for fully continuous models" }, { "-pclassfn", /* this switch is reference for -tyingtype pd */ ARG_STRING, NULL, "A SPHINX-II phone class file name" }, { NULL, 0, NULL, NULL } }; cmd_ln_parse(defn, argc, argv, TRUE); isHelp = cmd_ln_int32("-help"); isExample = cmd_ln_int32("-example"); if(isHelp) { printf("%s\n\n",helpstr); } if(isExample) { printf("%s\n\n",examplestr); } if(isHelp || isExample) { E_INFO("User asked for help or example.\n"); exit(0); } return 0; }
int32 live_utt_decode_block (int16 *samples, int32 nsamples, int32 live_endutt, partialhyp_t **ohyp) { static int32 live_begin_new_utt = 1; static int32 frmno; static float32 ***live_feat = NULL; int32 live_nfr, live_nfeatvec; int32 nwds =0; float32 **mfcbuf; /* int i,j;*/ /* 2004/08/27 L Galescu <*****@*****.**> -- added raw audio file saving */ static char uttfn[1024]; static FILE *rawfp = NULL; int16 block_peak_amplitude; if(live_feat==NULL) live_feat = feat_array_alloc (kbcore_fcb(kbcore), LIVEBUFBLOCKSIZE); if (live_begin_new_utt){ fe_start_utt(fe); utt_begin (kb); frmno = 0; kb->nfr = 0; kb->utt_hmm_eval = 0; kb->utt_sen_eval = 0; kb->utt_gau_eval = 0; live_begin_new_utt = 0; sprintf(uttfn, "%s/%s.raw", cmd_ln_str("-outrawdir"), kb->uttid); rawfp = fopen(uttfn, "wb"); } /* 10.jan.01 RAH, fe_process_utt now requires ***mfcbuf and it allocates the memory internally) */ mfcbuf = NULL; /* LG 20080613 */ block_peak_amplitude = get_peak_amplitude(samples, nsamples); if (block_peak_amplitude > peak_amplitude) peak_amplitude = block_peak_amplitude; E_INFO("segment peak %d\n",peak_amplitude); live_nfr = fe_process_utt(fe, samples, nsamples, &mfcbuf); /**/ if (rawfp != NULL) { fwrite(samples, sizeof(int16), nsamples, rawfp); if (live_endutt) fclose(rawfp); } if (live_endutt) { /* RAH, It seems that we shouldn't throw out this data */ fe_end_utt(fe,dummyframe); /* Flush out the fe */ } #if 0 E_INFO("Number frame after fe_process_utt %d\n",live_nfr); for(i=0;i<live_nfr;i++){ printf("%d ",i); for(j=0;j<13;j++){ printf("%f ",mfcbuf[i][j]); fflush(stdout); } printf("\n"); fflush(stdout); } #endif /* lgalescu 2004/08/22 -- i am under the impression that * feat_s2mfc2feat_block() needs to be called at the end of utt * even if no frames need processing */ /* lgalescu 2004/10/13 -- rescinded the above */ if(live_nfr>0){ /* Compute feature vectors */ live_nfeatvec = feat_s2mfc2feat_block(kbcore_fcb(kbcore), mfcbuf, live_nfr, live_begin_new_utt, live_endutt, live_feat); #if 0 E_INFO ("live_nfeatvec: %ld\n",live_nfeatvec); #endif #if 0 E_INFO("Current frame number %d, Number of frames %d, Number frame after feat_s2mfcfeat_block %d\n",frmno,live_nfr,live_nfeatvec); for(i=0;i<live_nfeatvec;i++){ printf("%d\n",i); printf("Cep: "); fflush(stdout); for(j=0;j<13;j++){ printf("%f ",live_feat[i][0][j]); fflush(stdout); } printf("\n"); fflush(stdout); printf("Del: "); fflush(stdout); for(j=13;j<26;j++){ printf("%f ",live_feat[i][0][j]); fflush(stdout); } printf("\n"); fflush(stdout); printf("Acc: "); fflush(stdout); for(j=26;j<39;j++){ printf("%f ",live_feat[i][0][j]); fflush(stdout); } printf("\n"); fflush(stdout); } #endif /* decode the block */ utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp); /* lgalescu 2004/08/21 * moved the following block out of the previous if(){} because we need * the output even when no feature computation has to be done. */ /* lgalescu 2004/10/13 -- rescinded */ /* Pull out partial hypothesis */ nwds = live_get_partialhyp(live_endutt); *ohyp = parthyp; parthyplen = nwds; } /* Clean up */ if (live_endutt) { live_begin_new_utt = 1; kb->tot_fr += kb->nfr; utt_end(kb); } else { live_begin_new_utt = 0; } /* I'm starting to think that fe_process_utt should not be allocating its * memory, that or it should allocate some max and just keep on going, * this idea of constantly allocating freeing memory seems dangerous to me. */ /* 20040318 ARCHAN : It sounds extremely dangerous to me and I will * eliminate it sometime. */ /* lgalescu: i second that! the memory issue needs to be investigated: after a run on linux, i noticed some 1.6M of memory having "disappeared"! */ if(live_nfr>0){ ckd_free_2d((void **) mfcbuf); /* RAH, this must be freed since fe_process_utt allocates it */ } return(parthyplen); }
/* * Continuous recognition from a file */ static void recognize_from_file() { cont_ad_t *cont; ad_rec_t file_ad = {0}; int16 adbuf[4096]; const char* hyp; const char* uttid; int32 k, ts, start; char waveheader[44]; if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) { E_FATAL_SYSTEM("Failed to open file '%s' for reading", cmd_ln_str_r(config, "-infile")); } fread(waveheader, 1, 44, rawfd); file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate"); file_ad.bps = sizeof(int16); if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) { E_FATAL("Failed to initialize voice activity detection\n"); } if (cont_ad_calib(cont) < 0) E_INFO("Using default voice activity detection\n"); rewind (rawfd); for (;;) { while ((k = cont_ad_read(cont, adbuf, 4096)) == 0); if (k < 0) break; if (ps_start_utt(ps, NULL) < 0) E_FATAL("ps_start_utt() failed\n"); ps_process_raw(ps, adbuf, k, FALSE, FALSE); hyp = ps_get_hyp(ps, NULL, &uttid); printf("= partial_hypothese | %s: %s\n", uttid, hyp); fflush(stdout); ts = cont->read_ts; start = ((ts - k) * 100.0) / file_ad.sps; for (;;) { if ((k = cont_ad_read(cont, adbuf, 4096)) < 0) break; if (k == 0) { /* * No speech data available; check current timestamp with most recent * speech to see if more than 1 sec elapsed. If so, end of utterance. */ if ((cont->read_ts - ts) > 2000) { printf("= utterance_end | %i\n", (cont->read_ts - ts) ); break; } } else { /* New speech data received; note current timestamp */ ts = cont->read_ts; } ps_process_raw(ps, adbuf, k, FALSE, FALSE); hyp = ps_get_hyp(ps, NULL, &uttid); printf("= partial_hypothese | %s: %s\n", uttid, hyp); fflush(stdout); } ps_end_utt(ps); if (cmd_ln_boolean_r(config, "-time")) { print_word_times(start); printf("= continue\n"); fflush(stdout); fgetc( stdin ); } else { hyp = ps_get_hyp(ps, NULL, &uttid); printf("= final_hypothese | %s: %s\n\n\n", uttid, hyp); } fflush(stdout); } cont_ad_close(cont); fclose(rawfd); }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyphrase; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); /* Set up logging. We need to do this earlier because we want to dump * the information to the configured log, not to the stderr. */ if (config && cmd_ln_str_r(ps->config, "-logfn")) { if (err_set_logfile(cmd_ln_str_r(ps->config, "-logfn")) < 0) { E_ERROR("Cannot redirect log output\n"); return -1; } } ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_expand_model_config(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ps_search_name(ps->phone_loop), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(ps->config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyphrase = cmd_ln_str_r(ps->config, "-keyphrase"))) { if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(ps->config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(ps->config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) { fsg_model_free(fsg); return -1; } fsg_model_free(fsg); ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Or load a JSGF grammar */ if ((path = cmd_ln_str_r(ps->config, "-jsgf"))) { if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lm")) && !cmd_ln_boolean_r(ps->config, "-allphone")) { if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_set_iter_free(lmset_it); ngram_model_free(lmset); return -1; } } ngram_model_free(lmset); name = cmd_ln_str_r(ps->config, "-lmname"); if (name) ps_set_search(ps, name); else { E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); return -1; } } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
static int batch_decoder_decode_adc(batch_decoder_t *bd, FILE *infh, int sf, int ef, alignment_t *al) { featbuf_t *fb = search_factory_featbuf(bd->sf); float32 samprate = cmd_ln_float32_r(bd->config, "-samprate"); int32 frate = cmd_ln_int32_r(bd->config, "-frate"); int16 buf[512]; if (ef != -1) { ef = (int32) (((ef - sf) * samprate / frate) + (samprate * cmd_ln_float32_r(bd->config, "-wlen"))); } sf = (int32) (sf * (samprate / frate)); fseek(infh, cmd_ln_int32_r(bd->config, "-adchdr") + sf * sizeof(int16), SEEK_SET); if (al) { alignment_iter_t *itor; double starttime = 0.0; for (itor = alignment_words(al); itor; itor = alignment_iter_next(itor)) { alignment_entry_t *ent = alignment_iter_get(itor); double nsec = (double) ent->duration / frate; double endtime = starttime + nsec; size_t nsamp = (size_t) (nsec * samprate); E_INFO("Processing %d samples for %s (%f seconds ending %f)\n", nsamp, dict_wordstr(search_factory_d2p(bd->sf)->dict, ent->id.wid), nsec, endtime); E_INFO("Woke up at delta %f\n", get_time_delta(bd)); while (nsamp > 0) { size_t nread = 512; if (nread > nsamp) nread = nsamp; nread = fread(buf, sizeof(int16), nread, infh); if (nread == 0) break; featbuf_producer_process_raw(fb, buf, nread, FALSE); nsamp -= nread; starttime += (nread / samprate); double delta = get_time_delta(bd); if (starttime > delta) { E_INFO("Sleeping until next start time (%f seconds)\n", starttime - delta); usleep((int)((starttime - delta) * 1000000)); } } double delta = get_time_delta(bd); if (endtime > delta) { E_INFO("Sleeping until end time (%f seconds)\n", endtime - delta); usleep((int)((endtime - delta) * 1000000)); } } } else { while (ef == -1 || sf < ef) { size_t nread = 512; if (ef != -1 && nread > ef - sf) nread = ef - sf; nread = fread(buf, sizeof(int16), nread, infh); if (nread == 0) break; featbuf_producer_process_raw(fb, buf, nread, FALSE); //usleep((int)((double)nread / 16000 * 1000000)); sf += nread; } } return 0; }
int ps_start_utt(ps_decoder_t *ps) { int rv; char uttid[16]; if (ps->acmod->state == ACMOD_STARTED || ps->acmod->state == ACMOD_PROCESSING) { E_ERROR("Utterance already started\n"); return -1; } if (ps->search == NULL) { E_ERROR("No search module is selected, did you forget to " "specify a language model or grammar?\n"); return -1; } ptmr_reset(&ps->perf); ptmr_start(&ps->perf); sprintf(uttid, "%09u", ps->uttno); ++ps->uttno; /* Remove any residual word lattice and hypothesis. */ ps_lattice_free(ps->search->dag); ps->search->dag = NULL; ps->search->last_link = NULL; ps->search->post = 0; ckd_free(ps->search->hyp_str); ps->search->hyp_str = NULL; if ((rv = acmod_start_utt(ps->acmod)) < 0) return rv; /* Start logging features and audio if requested. */ if (ps->mfclogdir) { char *logfn = string_join(ps->mfclogdir, "/", uttid, ".mfc", NULL); FILE *mfcfh; E_INFO("Writing MFCC log file: %s\n", logfn); if ((mfcfh = fopen(logfn, "wb")) == NULL) { E_ERROR_SYSTEM("Failed to open MFCC log file %s", logfn); ckd_free(logfn); return -1; } ckd_free(logfn); acmod_set_mfcfh(ps->acmod, mfcfh); } if (ps->rawlogdir) { char *logfn = string_join(ps->rawlogdir, "/", uttid, ".raw", NULL); FILE *rawfh; E_INFO("Writing raw audio log file: %s\n", logfn); if ((rawfh = fopen(logfn, "wb")) == NULL) { E_ERROR_SYSTEM("Failed to open raw audio log file %s", logfn); ckd_free(logfn); return -1; } ckd_free(logfn); acmod_set_rawfh(ps->acmod, rawfh); } if (ps->senlogdir) { char *logfn = string_join(ps->senlogdir, "/", uttid, ".sen", NULL); FILE *senfh; E_INFO("Writing senone score log file: %s\n", logfn); if ((senfh = fopen(logfn, "wb")) == NULL) { E_ERROR_SYSTEM("Failed to open senone score log file %s", logfn); ckd_free(logfn); return -1; } ckd_free(logfn); acmod_set_senfh(ps->acmod, senfh); } /* Start auxiliary phone loop search. */ if (ps->phone_loop) ps_search_start(ps->phone_loop); return ps_search_start(ps->search); }
/** * Find all active words in backpointer table and sort by frame. */ static void build_fwdflat_wordlist(ngram_search_t *ngs) { int32 i, f, sf, ef, wid, nwd; bptbl_t *bp; ps_latnode_t *node, *prevnode, *nextnode; /* No tree-search, use statically allocated wordlist. */ if (!ngs->fwdtree) return; memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist)); /* Scan the backpointer table for all active words and record * their exit frames. */ for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) { sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1; ef = bp->frame; wid = bp->wid; /* Anything that can be transitioned to in the LM can go in * the word list. */ if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(ps_search_dict(ngs), wid))) continue; /* Look for it in the wordlist. */ for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid); node = node->next); /* Update last end frame. */ if (node) node->lef = ef; else { /* New node; link to head of list */ node = listelem_malloc(ngs->latnode_alloc); node->wid = wid; node->fef = node->lef = ef; node->next = ngs->frm_wordlist[sf]; ngs->frm_wordlist[sf] = node; } } /* Eliminate "unlikely" words, for which there are too few end points */ for (f = 0; f < ngs->n_frame; f++) { prevnode = NULL; for (node = ngs->frm_wordlist[f]; node; node = nextnode) { nextnode = node->next; /* Word has too few endpoints */ if ((node->lef - node->fef < ngs->min_ef_width) || /* Word is </s> and doesn't actually end in last frame */ ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) { if (!prevnode) ngs->frm_wordlist[f] = nextnode; else prevnode->next = nextnode; listelem_free(ngs->latnode_alloc, node); } else prevnode = node; } } /* Form overall wordlist for 2nd pass */ nwd = 0; bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); for (f = 0; f < ngs->n_frame; f++) { for (node = ngs->frm_wordlist[f]; node; node = node->next) { if (!bitvec_is_set(ngs->word_active, node->wid)) { bitvec_set(ngs->word_active, node->wid); ngs->fwdflat_wordlist[nwd++] = node->wid; } } } ngs->fwdflat_wordlist[nwd] = -1; E_INFO("Utterance vocabulary contains %d words\n", nwd); }
static int cp_parm() { FILE *fp; uint32 i, o; uint32 max=0; /* Open the file first to see whether command-line parameters match */ if(cmd_ln_str("-cpopsfn")==NULL) { E_INFO("Please specify -cpopsfn\n"); return S3_ERROR; } fp = fopen(cmd_ln_str("-cpopsfn"), "r"); if (fp == NULL) { E_INFO("Unable to open cpops file\n"); return S3_ERROR; } while (fscanf(fp, "%u %u", &o, &i) == 2) { if(o+1>max) { max=o+1; } } if (omixw) { if(max != n_mixw_o) { E_INFO("Mismatch between cp operation file (max out %d) and -nmixout (%d)\n",max, n_mixw_o); return S3_ERROR; } } if (ogau) { if(max != n_cb_o) { E_INFO("Mismatch between cp operation file (max out %d) and -ncbout (%d)\n",max, n_cb_o); return S3_ERROR; } } if (ogau_full) { if(max != n_cb_o) { E_INFO("Mismatch between cp operation file (max out %d) and -ncbout (%d)\n",max, n_cb_o); return S3_ERROR; } } if (otmat) { if(max != n_tmat_o) { E_INFO("Mismatch between cp operation file (max out %d) and -ntmatout (%d)\n",max, n_tmat_o); return S3_ERROR; } } fclose(fp); fp = fopen(cmd_ln_str("-cpopsfn"), "r"); while (fscanf(fp, "%u %u", &o, &i) == 2) { if (omixw) { cp_mixw(o, i); } if (ogau) { cp_gau(o, i); } if (ogau_full) { cp_gau_full(o, i); } if (otmat) { cp_tmat(o, i); } } fclose(fp); return S3_SUCCESS; }
void utt_decode(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { kb_t *kb; kbcore_t *kbcore; cmd_ln_t *config; int32 num_decode_frame; int32 total_frame; stat_t *st; srch_t *s; num_decode_frame = 0; E_INFO("Processing: %s\n", uttid); kb = (kb_t *) data; kbcore = kb->kbcore; config = kbcore_config(kbcore); kb_set_uttid(uttid, ur->uttfile, kb); st = kb->stat; /* Convert input file to cepstra if waveform input is selected */ if (cmd_ln_boolean_r(config, "-adcin")) { int16 *adcdata; int32 nsamps = 0; if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"), ur->uttfile, cmd_ln_str_r(config, "-cepext"), cmd_ln_int32_r(config, "-adchdr"), strcmp(cmd_ln_str_r(config, "-input_endian"), "big"), &nsamps)) == NULL) { E_FATAL("Cannot read file %s\n", ur->uttfile); } if (kb->mfcc) { ckd_free_2d((void **)kb->mfcc); } fe_start_utt(kb->fe); if (fe_process_utt(kb->fe, adcdata, nsamps, &kb->mfcc, &total_frame) < 0) { E_FATAL("MFCC calculation failed\n", ur->uttfile); } ckd_free(adcdata); if (total_frame > S3_MAX_FRAMES) { E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES); } if ((total_frame = feat_s2mfc2feat_live(kbcore_fcb(kbcore), kb->mfcc, &total_frame, TRUE, TRUE, kb->feat)) < 0) { E_FATAL("Feature computation failed\n"); } } else { /* Read mfc file and build feature vectors for entire utterance */ if ((total_frame = feat_s2mfc2feat(kbcore_fcb(kbcore), ur->uttfile, cmd_ln_str_r(config, "-cepdir"), cmd_ln_str_r(config, "-cepext"), sf, ef, kb->feat, S3_MAX_FRAMES)) < 0) { E_FATAL("Cannot read file %s. Forced exit\n", ur->uttfile); } } /* Also need to make sure we don't set resource if it is the same. Well, this mechanism could be provided inside the following function. */ s = kb->srch; if (ur->lmname != NULL) srch_set_lm(s, ur->lmname); if (ur->regmatname != NULL) kb_setmllr(ur->regmatname, ur->cb2mllrname, kb); /* These are necessary! */ s->uttid = kb->uttid; s->uttfile = kb->uttfile; utt_begin(kb); utt_decode_block(kb->feat, total_frame, &num_decode_frame, kb); utt_end(kb); st->tot_fr += st->nfr; }
int read_seno_dtree_file(dtree_t **out_dt, const char *file_name) { uint32 n_base; uint32 n_cd; char tree_id[64]; float32 ent; uint32 key, l_key, r_key, n_key = 0, *k2q = NULL; char q_str[1024], *rem_q_str; FILE *fp; int i, j, n_quest; dtree_t *dt; bt_node_t *node; bt_t *tree; comp_quest_t *q; float32 *q2ent; *out_dt = dt = (dtree_t *)ckd_calloc(1, sizeof(dtree_t)); dt->tree = tree = bt_new(); fp = fopen(file_name, "r"); if (fp == NULL) { E_WARN_SYSTEM("Unable to open %s for reading", file_name); return S3_ERROR; } for (n_quest = 0; fgets(q_str, 1024, fp) != NULL; n_quest++); --n_quest; /* account for header line */ dt->n_quest = n_quest; rewind(fp); read_header_line(&n_base, tree_id, &n_cd, fp); for (i = 0; read_node_line(&ent, &key, &l_key, &r_key, q_str, fp) > 0; i++) { if (n_key < l_key) n_key = l_key; if (n_key < r_key) n_key = r_key; } ++n_key; dt->n_key = n_key; rewind(fp); read_header_line(&n_base, tree_id, &n_cd, fp); E_INFO("Reading tree %s (%u base phones, %u CD phones, %u quest)\n", tree_id, n_base, n_cd, n_quest); dt->quest = q = (comp_quest_t *)ckd_calloc(n_quest, sizeof(comp_quest_t)); dt->k2q = k2q = (uint32 *)ckd_calloc(n_key, sizeof(uint32)); dt->q2ent = q2ent = (float32 *)ckd_calloc(n_quest, sizeof(float32)); for (i = 0; i < n_key; i++) { k2q[i] = NO_MAP; } for (i = 0; read_node_line(&ent, &key, &l_key, &r_key, q_str, fp) > 0; i++) { if (tree->root) { E_INFO("%u\n", key); node = bt_find_node(tree, key); if (node) { /* grow left and right children */ bt_add_left(node, l_key); bt_add_right(node, r_key); } else { E_FATAL("Find node w/ key %u failed\n", key); } } else { E_INFO("root %u\n", key); tree->root = bt_new_node(key); bt_add_left(tree->root, l_key); bt_add_right(tree->root, r_key); } k2q[key] = i; q2ent[i] = ent; parse_compound_q(&q[i], q_str); } assert(i == n_quest); return S3_SUCCESS; }
static void ps_init_defaults(ps_decoder_t *ps) { char const *hmmdir, *lmfile, *dictfile; /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */ #ifdef __ADSPBLACKFIN__ E_INFO("Will not use mmap() on uClinux/Blackfin."); cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE); #endif #ifdef MODELDIR /* Set default acoustic and language models. */ hmmdir = cmd_ln_str_r(ps->config, "-hmm"); lmfile = cmd_ln_str_r(ps->config, "-lm"); dictfile = cmd_ln_str_r(ps->config, "-dict"); if (hmmdir == NULL && hmmdir_exists(MODELDIR "/hmm/en_US/hub4wsj_sc_8k")) { hmmdir = MODELDIR "/hmm/en_US/hub4wsj_sc_8k"; cmd_ln_set_str_r(ps->config, "-hmm", hmmdir); } if (lmfile == NULL && !cmd_ln_str_r(ps->config, "-fsg") && !cmd_ln_str_r(ps->config, "-jsgf") && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) { lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP"; cmd_ln_set_str_r(ps->config, "-lm", lmfile); } if (dictfile == NULL && file_exists(MODELDIR "/lm/en_US/cmu07a.dic")) { dictfile = MODELDIR "/lm/en_US/cmu07a.dic"; cmd_ln_set_str_r(ps->config, "-dict", dictfile); } /* Expand acoustic and language model filenames relative to installation path. */ if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) { char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL); cmd_ln_set_str_r(ps->config, "-hmm", tmphmm); ckd_free(tmphmm); } if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) { char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL); cmd_ln_set_str_r(ps->config, "-lm", tmplm); ckd_free(tmplm); } if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) { char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL); cmd_ln_set_str_r(ps->config, "-dict", tmpdict); ckd_free(tmpdict); } #endif /* Get acoustic model filenames and add them to the command-line */ if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) { ps_add_file(ps, "-mdef", hmmdir, "mdef"); ps_add_file(ps, "-mean", hmmdir, "means"); ps_add_file(ps, "-var", hmmdir, "variances"); ps_add_file(ps, "-tmat", hmmdir, "transition_matrices"); ps_add_file(ps, "-mixw", hmmdir, "mixture_weights"); ps_add_file(ps, "-sendump", hmmdir, "sendump"); ps_add_file(ps, "-fdict", hmmdir, "noisedict"); ps_add_file(ps, "-lda", hmmdir, "feature_transform"); ps_add_file(ps, "-featparams", hmmdir, "feat.params"); ps_add_file(ps, "-senmgau", hmmdir, "senmgau"); } }
/** * Build net from phone HMMs */ static int phmm_build(allphone_search_t * allphs) { phmm_t *p, **pid2phmm; bin_mdef_t *mdef; int32 lrc_size; uint32 *lc, *rc; s3pid_t pid; s3cipid_t ci; s3cipid_t *filler; int n_phmm, n_link; int i, nphone; mdef = ((ps_search_t *) allphs)->acmod->mdef; allphs->ci_phmm = (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *)); pid2phmm = (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *)); /* For each unique ciphone/triphone entry in mdef, create a PHMM node */ n_phmm = 0; nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef); E_INFO("Building PHMM net of %d phones\n", nphone); for (pid = 0; pid < nphone; pid++) { if ((p = phmm_lookup(allphs, pid)) == NULL) { //not found, should be created p = (phmm_t *) ckd_calloc(1, sizeof(*p)); hmm_init(allphs->hmmctx, &(p->hmm), FALSE, mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat); p->pid = pid; p->ci = bin_mdef_pid2ci(mdef, pid); p->succlist = NULL; p->next = allphs->ci_phmm[p->ci]; allphs->ci_phmm[p->ci] = p; n_phmm++; } pid2phmm[pid] = p; } /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */ lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef)); lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t)); rc = lc + (n_phmm * lrc_size); for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = allphs->ci_phmm[ci]; p; p = p->next) { p->lc = lc; lc += lrc_size; p->rc = rc; rc += lrc_size; } } /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */ filler = (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); /* Connect fillers */ i = 0; for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) { p = pid2phmm[ci]; bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef)); bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef)); if (mdef->phone[ci].info.ci.filler) { filler[i++] = ci; } } filler[i] = BAD_S3CIPID; /* Loop over cdphones only if ci_only is not set */ for (pid = bin_mdef_n_ciphone(mdef); pid < nphone; pid++) { p = pid2phmm[pid]; if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) bitvec_set(p->lc, filler[i]); } else bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]); if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) bitvec_set(p->rc, filler[i]); } else bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]); } ckd_free(pid2phmm); ckd_free(filler); /* Create links between PHMM nodes */ n_link = phmm_link(allphs); E_INFO("%d nodes, %d links\n", n_phmm, n_link); return 0; }
static int acmod_init_am(acmod_t *acmod) { char const *mdeffn, *tmatfn, *mllrfn, *hmmdir; /* Read model definition. */ if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) { if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL) E_ERROR("Acoustic model definition is not specified either " "with -mdef option or with -hmm\n"); else E_ERROR("Folder '%s' does not contain acoustic model " "definition 'mdef'\n", hmmdir); return -1; } if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) { E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn); return -1; } /* Read transition matrices. */ if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) { E_ERROR("No tmat file specified\n"); return -1; } acmod->tmat = tmat_init(tmatfn, acmod->lmath, cmd_ln_float32_r(acmod->config, "-tmatfloor"), TRUE); /* Read the acoustic models. */ if ((cmd_ln_str_r(acmod->config, "-mean") == NULL) || (cmd_ln_str_r(acmod->config, "-var") == NULL) || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) { E_ERROR("No mean/var/tmat files specified\n"); return -1; } if (cmd_ln_str_r(acmod->config, "-senmgau")) { E_INFO("Using general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } else { E_INFO("Attempting to use SCHMM computation module\n"); if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) { E_INFO("Attempting to use PTHMM computation module\n"); if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) { E_INFO("Falling back to general multi-stream GMM computation\n"); acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); if (acmod->mgau == NULL) return -1; } } } /* If there is an MLLR transform, apply it. */ if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) { ps_mllr_t *mllr = ps_mllr_read(mllrfn); if (mllr == NULL) return -1; acmod_update_mllr(acmod, mllr); } return 0; }
dict_t *dict_init (mdef_t *mdef, char *dictfile, char *fillerfile, char comp_sep) { FILE *fp, *fp2; int32 n ; char line[1024]; dict_t *d; if (! dictfile) E_FATAL("No dictionary file\n"); /* * First obtain #words in dictionary (for hash table allocation). * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate * all the required memory in one go. */ if ((fp = fopen(dictfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", dictfile); n = 0; while (fgets (line, sizeof(line), fp) != NULL) { if (line[0] != '#') n++; } rewind (fp); if (fillerfile) { if ((fp2 = fopen(fillerfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", fillerfile); while (fgets (line, sizeof(line), fp2) != NULL) { if (line[0] != '#') n++; } rewind (fp2); } /* * Allocate dict entries. HACK!! Allow some extra entries for words not in file. * Also check for type size restrictions. */ d = (dict_t *) ckd_calloc (1, sizeof(dict_t)); d->max_words = (n+1024 < MAX_WID) ? n+1024 : MAX_WID; if (n >= MAX_WID) E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_WID); d->word = (dictword_t *) ckd_calloc (d->max_words, sizeof(dictword_t)); d->n_word = 0; d->mdef = mdef; if (mdef) { d->pht = NULL; d->ciphone_str = NULL; } else { d->pht = hash_new (DEFAULT_NUM_PHONE, 1 /* No case */); d->ciphone_str = (char **) ckd_calloc (DEFAULT_NUM_PHONE, sizeof(char *)); } d->n_ciphone = 0; /* Create new hash table for word strings; case-insensitive word strings */ d->ht = hash_new (d->max_words, 1 /* no-case */); /* Initialize with no compound words */ d->comp_head = NULL; /* Digest main dictionary file */ E_INFO("Reading main dictionary: %s\n", dictfile); dict_read (fp, d); fclose (fp); E_INFO("%d words read\n", d->n_word); /* Now the filler dictionary file, if it exists */ d->filler_start = d->n_word; if (fillerfile) { E_INFO("Reading filler dictionary: %s\n", fillerfile); dict_read (fp2, d); fclose (fp2); E_INFO("%d words read\n", d->n_word - d->filler_start); } d->filler_end = d->n_word-1; /* Initialize distinguished word-ids */ d->startwid = dict_wordid (d, START_WORD); d->finishwid = dict_wordid (d, FINISH_WORD); d->silwid = dict_wordid (d, SILENCE_WORD); if (NOT_WID(d->startwid)) E_WARN("%s not in dictionary\n", START_WORD); if (NOT_WID(d->finishwid)) E_WARN("%s not in dictionary\n", FINISH_WORD); if (NOT_WID(d->silwid)) E_WARN("%s not in dictionary\n", SILENCE_WORD); /* Identify compound words if indicated */ if (comp_sep) { E_INFO("Building compound words (separator = '%c')\n", comp_sep); n = dict_build_comp (d, comp_sep); E_INFO("%d compound words\n", n); } return d; }
dtree_t * mk_tree(float32 ****mixw, /* ADDITION FOR CONTINUOUS_TREES, 20 May 98 */ float32 ****means, float32 ****vars, uint32 *veclen, /* END ADDITION FOR CONTINUOUS_TREES */ uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, uint32 *id, uint32 n_id, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 **dfeat, uint32 n_dfeat, uint32 split_min, uint32 split_max, float32 split_thr, float32 mwfloor) { dtree_t *s_tree; uint32 i; dtree_node_t *b_n, *root; s_tree = ckd_calloc(1, sizeof(dtree_t)); s_tree->node = ckd_calloc(2*split_max + 1, sizeof(dtree_node_t)); s_tree->n_node = 0; s_tree->node[0].node_id = 0; s_tree->n_node = 1; root = &s_tree->node[0]; mk_node(root, 0, id, n_id, mixw, /* ADDITION FOR CONTINUOUS_TREES; 20 May 98; passing means, vars, veclen */ means, vars, veclen, /* END ADDITION FOR CONTINUOUS_TREES */ n_model, n_state, n_stream, n_density, stwt, mwfloor); set_best_quest(root, mixw, /* ADDITION FOR CONTINUOUS_TREES; 20 May 98; passing means, vars and veclen */ means, vars, veclen, /* END ADDITION FOR CONTINUOUS_TREES */ n_model, n_state, n_stream, n_density, stwt, all_q, n_all_q, pset, dfeat, n_dfeat, mwfloor); if (root->q == NULL) { /* No question found that is able to split node; can't go any further */ free_tree(s_tree); return NULL; } for (i = 0; i < split_max; i++) { b_n = best_leaf_node(root); if (b_n == NULL) { E_INFO("stop. leaf nodes are specific\n"); break; } /* DDDDDBUG The following criteria will fail if we use only likelihood and no likelihood increase */ if (b_n->wt_ent_dec <= 0) { E_INFO("stop. b_n->wt_ent_dec (%.3e) <= 0\n", b_n->wt_ent_dec); break; } if ((i > split_min) && (b_n->wt_ent_dec < split_thr * b_n->wt_ent)) { E_INFO("stop. b_n->wt_ent_dec (%.3e) < split_thr * b_n->wt_ent (%.3e)\n", b_n->wt_ent_dec, b_n->wt_ent * split_thr); break; } split_node(s_tree, b_n->node_id, mixw, /* ADDITION FOR CONTINUOUS_TREES; 20 May 98; passing means, vars and veclen */ means, vars, veclen, /* END ADDITION FOR CONTINUOUS_TREES */ n_model, n_state, n_stream, n_density, stwt, all_q, n_all_q, pset, dfeat, n_dfeat, mwfloor); } #if 1 E_INFO("Final simple tree\n"); print_tree(stderr, "|", root, pset, 0); fprintf(stderr, "\n"); #endif return s_tree; }
s3wid_t dict_add_word(dict_t * d, char *word, s3cipid_t * p, int32 np) { int32 len; dictword_t *wordp; s3wid_t newwid; if (d->n_word >= d->max_words) { E_INFO ("Dictionary max size (%d) exceeded; reallocate another entries %d \n", d->max_words, DICT_INC_SZ); d->word = (dictword_t *) ckd_realloc(d->word, (d->max_words + DICT_INC_SZ) * sizeof(dictword_t)); d->max_words = d->max_words + DICT_INC_SZ; return (BAD_S3WID); } wordp = d->word + d->n_word; wordp->word = (char *) ckd_salloc(word); /* Freed in dict_free */ /* Associate word string with d->n_word in hash table */ if (hash_table_enter(d->ht, wordp->word, (void *)(long)d->n_word) != (void *)(long)d->n_word) { ckd_free(wordp->word); return (BAD_S3WID); } /* Fill in word entry, and set defaults */ if (p && (np > 0)) { wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t)); /* Freed in dict_free */ memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t)); wordp->pronlen = np; } else { wordp->ciphone = NULL; wordp->pronlen = 0; } wordp->alt = BAD_S3WID; wordp->basewid = d->n_word; wordp->n_comp = 0; wordp->comp = NULL; /* Determine base/alt wids */ if ((len = dict_word2basestr(word)) > 0) { void *val; s3wid_t w; /* Truncated to a baseword string; find its ID */ if (hash_table_lookup(d->ht, word, &val) < 0) { word[len] = '('; /* Get back the original word */ E_FATAL("Missing base word for: %s\n", word); } else word[len] = '('; /* Get back the original word */ /* Link into alt list */ w = (s3wid_t)(long)val; wordp->basewid = w; wordp->alt = d->word[w].alt; d->word[w].alt = d->n_word; } newwid = d->n_word++; return (newwid); }
int main(int argc, char *argv[]) { acmod_t *acmod; logmath_t *lmath; cmd_ln_t *config; FILE *rawfh; int16 *buf; int16 const *bptr; mfcc_t **cepbuf, **cptr; size_t nread, nsamps; int nfr; int frame_counter; int bestsen1[270]; lmath = logmath_init(1.0001, 0, 0); config = cmd_ln_init(NULL, ps_args(), TRUE, "-mdef", MODELDIR "/en-us/en-us/mdef", "-mean", MODELDIR "/en-us/en-us/means", "-var", MODELDIR "/en-us/en-us/variances", "-tmat", MODELDIR "/en-us/en-us/transition_matrices", "-sendump", MODELDIR "/en-us/en-us/sendump", "-compallsen", "true", "-cmn", "prior", "-tmatfloor", "0.0001", "-mixwfloor", "0.001", "-varfloor", "0.0001", "-mmap", "no", "-topn", "4", "-ds", "1", "-input_endian", "little", "-samprate", "16000", NULL); TEST_ASSERT(config); cmd_ln_parse_file_r(config, ps_args(), MODELDIR "/en-us/en-us/feat.params", FALSE); TEST_ASSERT(acmod = acmod_init(config, lmath, NULL, NULL)); cmn_prior_set(acmod->fcb->cmn_struct, prior); nsamps = 2048; frame_counter = 0; buf = ckd_calloc(nsamps, sizeof(*buf)); TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); E_INFO("Incremental(2048):\n"); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0 || nread > 0) { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) bestsen1[frame_counter] = best_score; ++frame_counter; frame_idx = -1; } } } TEST_EQUAL(0, acmod_end_utt(acmod)); nread = 0; { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) bestsen1[frame_counter] = best_score; TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Now try to process the whole thing at once. */ E_INFO("Whole utterance:\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); nsamps = ftell(rawfh) / sizeof(*buf); clearerr(rawfh); fseek(rawfh, 0, SEEK_SET); buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); bptr = buf; TEST_EQUAL(0, acmod_start_utt(acmod)); acmod_process_raw(acmod, &bptr, &nsamps, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); { int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Now process MFCCs and make sure we get the same results. */ cepbuf = ckd_calloc_2d(frame_counter, fe_get_output_size(acmod->fe), sizeof(**cepbuf)); fe_start_utt(acmod->fe); nsamps = ftell(rawfh) / sizeof(*buf); bptr = buf; nfr = frame_counter; fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL); fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr); E_INFO("Incremental(MFCC):\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); TEST_EQUAL(0, acmod_start_utt(acmod)); cptr = cepbuf; nfr = frame_counter; frame_counter = 0; while ((acmod_process_cep(acmod, &cptr, &nfr, FALSE)) > 0) { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); ++frame_counter; frame_idx = -1; } } TEST_EQUAL(0, acmod_end_utt(acmod)); nfr = 0; acmod_process_cep(acmod, &cptr, &nfr, FALSE); { int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); ++frame_counter; frame_idx = -1; } } /* Note that we have to process the whole thing again because * !#@$@ s2mfc2feat modifies its argument (not for long) */ fe_start_utt(acmod->fe); nsamps = ftell(rawfh) / sizeof(*buf); bptr = buf; nfr = frame_counter; fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL); fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr); E_INFO("Whole utterance (MFCC):\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); TEST_EQUAL(0, acmod_start_utt(acmod)); cptr = cepbuf; nfr = frame_counter; acmod_process_cep(acmod, &cptr, &nfr, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); { int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } E_INFO("Rewound (MFCC):\n"); TEST_EQUAL(0, acmod_rewind(acmod)); { int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Clean up, go home. */ ckd_free_2d(cepbuf); fclose(rawfh); ckd_free(buf); acmod_free(acmod); logmath_free(lmath); cmd_ln_free_r(config); return 0; }
/* Update kb w/ new dictionary and new LM. * assumes: single-LM kbcore (before & after) * requires: updating kbcore * Lucian Galescu, 08/11/2005 */ void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; /*** clean up ***/ vithist_t *vithist = kb->vithist; if (kb->fillertree) ckd_free ((void *)kb->fillertree); if (kb->hmm_hist) ckd_free ((void *)kb->hmm_hist); /* vithist */ if (vithist) { ckd_free ((void *) vithist->entry); ckd_free ((void *) vithist->frame_start); ckd_free ((void *) vithist->bestscore); ckd_free ((void *) vithist->bestvh); ckd_free ((void *) vithist->lms2vh_root); ckd_free ((void *) kb->vithist); } /*** re-initialize ***/ kb->kbcore = kbcore_update_lm(kb->kbcore, dictfile, cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ lmfile, cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw")); if(kb->kbcore==NULL){ E_FATAL("Updating kbcore failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lm){ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if (lm) { E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ }