static int32 interp_read(interp_t * ip, const char *file_name) { FILE *fp; int32 byteswap, chksum_present; int32 i; char eofchk; float f; char **argname, **argval; uint32 chksum; E_INFO("Reading interpolation weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], INTERP_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], INTERP_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones */ if (bio_fread(&(ip->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (arraysize) failed\n", file_name); if (ip->n_sen <= 0) E_FATAL("%s: arraysize= %d in header\n", file_name, ip->n_sen); ip->wt = (struct interp_wt_s *) ckd_calloc(ip->n_sen, sizeof(struct interp_wt_s)); for (i = 0; i < ip->n_sen; i++) { if (bio_fread(&f, sizeof(float32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (arraydata) failed\n", file_name); if ((f < 0.0) || (f > 1.0)) E_FATAL("%s: interpolation weight(%d)= %e\n", file_name, i, f); ip->wt[i].cd = (f == 0.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, f); ip->wt[i].ci = (f == 1.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, 1.0 - f); } if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read %d interpolation weights\n", ip->n_sen); return 1; }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
tmat_t *tmat_init (char *file_name, float64 tpfloor) { char tmp; int32 n_src, n_dst; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 **tp; int32 i, j, k, tp_per_tmat; char **argname, **argval; tmat_t *t; E_INFO("Reading HMM transition probability matrices: %s\n", file_name); t = (tmat_t *) ckd_calloc (1, sizeof(tmat_t)); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr (fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp (argname[i], "version") == 0) { if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], TMAT_PARAM_VERSION); } else if (strcmp (argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free (argname, argval); argname = argval = NULL; chksum = 0; /* Read #tmat, #from-states, #to-states, arraysize */ if ((bio_fread (&(t->n_tmat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&n_src, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (t->n_tmat >= MAX_S3TMATID) E_FATAL("%s: #tmat (%d) exceeds limit (%d)\n", file_name, t->n_tmat, MAX_S3TMATID); if (n_dst != n_src+1) E_FATAL("%s: #from-states(%d) != #to-states(%d)-1\n", file_name, n_src, n_dst); t->n_state = n_src; if (i != t->n_tmat * n_src * n_dst) { E_FATAL("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, i, t->n_tmat, n_src, n_dst); } /* Allocate memory for tmat data */ t->tp = (int32 ***) ckd_calloc_3d (t->n_tmat, n_src, n_dst, sizeof(int32)); /* Temporary structure to read in the float data */ tp = (float32 **) ckd_calloc_2d (n_src, n_dst, sizeof(float32)); /* Read transition matrices, normalize and floor them, and convert to logs3 domain */ tp_per_tmat = n_src * n_dst; for (i = 0; i < t->n_tmat; i++) { if (bio_fread (tp[0], sizeof(float32), tp_per_tmat, fp, byteswap, &chksum) != tp_per_tmat) { E_FATAL("fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ for (j = 0; j < n_src; j++) { if (vector_sum_norm (tp[j], n_dst) == 0.0) E_ERROR("Normalization failed for tmat %d from state %d\n", i, j); vector_nz_floor (tp[j], n_dst, tpfloor); vector_sum_norm (tp[j], n_dst); /* Convert to logs3. Take care of special case when tp = 0.0! */ for (k = 0; k < n_dst; k++) t->tp[i][j][k] = (tp[j][k] == 0.0) ? S3_LOGPROB_ZERO : logs3(tp[j][k]); } } ckd_free_2d ((void **) tp); if (chksum_present) bio_verify_chksum (fp, byteswap, chksum); if (fread (&tmp, 1, 1, fp) == 1) E_ERROR("Non-empty file beyond end of data\n"); fclose(fp); E_INFO("Read %d transition matrices of size %dx%d\n", t->n_tmat, t->n_state, t->n_state+1); if (tmat_chk_uppertri (t) < 0) E_FATAL("Tmat not upper triangular\n"); return t; }
word_fsg_t * word_fsg_load(s2_fsg_t * fsg, int use_altpron, int use_filler, kbcore_t *kbc) { float32 silprob = kbc->fillpen->silprob; float32 fillprob = kbc->fillpen->fillerprob; float32 lw = kbc->fillpen->lw; word_fsg_t *word_fsg; s2_fsg_trans_t *trans; int32 n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk; int32 wid; int32 logp; glist_t nulls; int32 i, j; assert(fsg); /* Some error checking */ if (lw <= 0.0) E_WARN("Unusual language-weight value: %.3e\n", lw); if (use_filler && ((silprob < 0.0) || (fillprob < 0.0))) { E_ERROR("silprob/fillprob must be >= 0\n"); return NULL; } if ((fsg->n_state <= 0) || ((fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) || ((fsg->final_state < 0) || (fsg->final_state >= fsg->n_state))) { E_ERROR("Bad #states/start_state/final_state values: %d/%d/%d\n", fsg->n_state, fsg->start_state, fsg->final_state); return NULL; } for (trans = fsg->trans_list; trans; trans = trans->next) { if ((trans->from_state < 0) || (trans->from_state >= fsg->n_state) || (trans->to_state < 0) || (trans->to_state >= fsg->n_state) || (trans->prob <= 0) || (trans->prob > 1.0)) { E_ERROR("Bad transition: P(%d -> %d) = %e\n", trans->from_state, trans->to_state, trans->prob); return NULL; } } word_fsg = (word_fsg_t *) ckd_calloc(1, sizeof(word_fsg_t)); word_fsg->name = ckd_salloc(fsg->name ? fsg->name : ""); word_fsg->n_state = fsg->n_state; word_fsg->start_state = fsg->start_state; word_fsg->final_state = fsg->final_state; word_fsg->use_altpron = use_altpron; word_fsg->use_filler = use_filler; word_fsg->lw = lw; word_fsg->lc = NULL; word_fsg->rc = NULL; word_fsg->dict = kbc->dict; word_fsg->mdef = kbc->mdef; word_fsg->tmat = kbc->tmat; word_fsg->n_ciphone = mdef_n_ciphone(kbc->mdef); /* Allocate non-epsilon transition matrix array */ word_fsg->trans = (glist_t **) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(glist_t)); /* Allocate epsilon transition matrix array */ word_fsg->null_trans = (word_fsglink_t ***) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(word_fsglink_t *)); /* Process transitions */ n_null_trans = 0; n_alt_trans = 0; n_filler_trans = 0; n_unk = 0; nulls = NULL; for (trans = fsg->trans_list, n_trans = 0; trans; trans = trans->next, n_trans++) { /* Convert prob to logs2prob and apply language weight */ logp = (int32) (logs3(kbcore_logmath(kbc), trans->prob) * lw); /* Check if word is in dictionary */ if (trans->word) { wid = dict_wordid(kbc->dict, trans->word); if (wid < 0) { E_ERROR("Unknown word '%s'; ignored\n", trans->word); n_unk++; } else if (use_altpron) { wid = dict_basewid(kbc->dict, wid); assert(wid >= 0); } } else wid = -1; /* Null transition */ /* Add transition to word_fsg structure */ i = trans->from_state; j = trans->to_state; if (wid < 0) { if (word_fsg_null_trans_add(word_fsg, i, j, logp) == 1) { n_null_trans++; nulls = glist_add_ptr(nulls, (void *) word_fsg->null_trans[i][j]); } } else { word_fsg_trans_add(word_fsg, i, j, logp, wid); /* Add transitions for alternative pronunciations, if any */ if (use_altpron) { for (wid = dict_nextalt(kbc->dict, wid); wid >= 0; wid = dict_nextalt(kbc->dict, wid)) { word_fsg_trans_add(word_fsg, i, j, logp, wid); n_alt_trans++; n_trans++; } } } } /* Add silence and noise filler word transitions if specified */ if (use_filler) { n_filler_trans = word_fsg_add_filler(word_fsg, silprob, fillprob, kbcore_logmath(kbc)); n_trans += n_filler_trans; } E_INFO ("FSG: %d states, %d transitions (%d null, %d alt, %d filler, %d unknown)\n", word_fsg->n_state, n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk); #if __FSG_DBG__ E_INFO("FSG before NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Null transitions closure */ nulls = word_fsg_null_trans_closure(word_fsg, nulls); glist_free(nulls); #if __FSG_DBG__ E_INFO("FSG after NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Compute left and right context CIphone lists for each state */ word_fsg_lc_rc(word_fsg); #if __FSG_DBG__ E_INFO("FSG after lc/rc:\n"); word_fsg_write(word_fsg, stdout); #endif return word_fsg; }
fillpen_t *fillpen_init (dict_t *dict, char *file, float64 silprob, float64 fillprob, float64 lw, float64 wip) { s3wid_t w, bw; float64 prob; FILE *fp; char line[1024], wd[1024]; int32 k; fillpen_t *_fillpen; _fillpen = (fillpen_t *) ckd_calloc (1, sizeof(fillpen_t)); _fillpen->dict = dict; _fillpen->lw = lw; _fillpen->wip = wip; if (dict->filler_end >= dict->filler_start) _fillpen->prob = (int32 *) ckd_calloc (dict->filler_end - dict->filler_start + 1, sizeof(int32)); else _fillpen->prob = NULL; /* Initialize all words with filler penalty (HACK!! backward compatibility) */ prob = fillprob; for (w = dict->filler_start; w <= dict->filler_end; w++) _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); /* Overwrite silence penalty (HACK!! backward compatibility) */ w = dict_wordid (dict, S3_SILENCE_WORD); if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end)) E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD); prob = silprob; _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); /* Overwrite with filler prob input file, if specified */ if (! file) return _fillpen; E_INFO("Reading filler penalty file: %s\n", file); if ((fp = fopen (file, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", file); while (fgets (line, sizeof(line), fp) != NULL) { if (line[0] == '#') /* Skip comment lines */ continue; k = sscanf (line, "%s %lf", wd, &prob); if ((k != 0) && (k != 2)) E_FATAL("Bad input line: %s\n", line); w = dict_wordid(dict, wd); if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end)) E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD); _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); } fclose (fp); /* Replicate fillpen values for alternative pronunciations */ for (w = dict->filler_start; w <= dict->filler_end; w++) { bw = dict_basewid (dict, w); if (bw != w) _fillpen->prob[w-dict->filler_start] = _fillpen->prob[bw-dict->filler_start]; } return _fillpen; }
acoustic_t *acoustic_init (feat_t *f, gauden_t *g, senone_t *s, float64 beam, int32 maxfr) { acoustic_t *am; int32 i; if (senone_n_mgau(s) != gauden_n_mgau(g)) { E_ERROR("#Parent mixture Gaussians mismatch: senone(%d), gauden(%d)\n", senone_n_mgau(s), gauden_n_mgau(g)); } if (feat_n_stream(f) != senone_n_stream(s)) { E_ERROR("#Feature-streams mismatch: feat(%d), senone(%d)\n", feat_n_stream(f), senone_n_stream(s)); } if (feat_n_stream(f) != gauden_n_stream(g)) { E_ERROR("#Feature-streams mismatch: feat(%d), gauden(%d)\n", feat_n_stream(f), gauden_n_stream(g)); return NULL; } for (i = 0; i < feat_n_stream(f); i++) { if (feat_stream_len(f, i) != gauden_stream_len(g, i)) { E_ERROR("Feature stream(%d) length mismatch: feat(%d), gauden(%d)\n", feat_stream_len(f, i), gauden_stream_len(g, i)); return NULL; } } if (beam > 1.0) { E_ERROR("mgaubeam > 1.0 (%e)\n", beam); return NULL; } am = (acoustic_t *) ckd_calloc (1, sizeof(acoustic_t)); am->fcb = f; am->gau = g; am->sen = s; am->mgaubeam = (beam == 0.0) ? LOGPROB_ZERO : logs3(beam); if (am->mgaubeam > 0) am->mgaubeam = 0; am->tot_mgau_eval = 0; am->tot_dist_valid = 0.0; am->dist_valid = (am->mgaubeam <= LOGPROB_ZERO) ? NULL : (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32)); if (f->compute_feat) { /* Input is MFC cepstra; feature vectors computed from that */ am->mfc = (float32 **) ckd_calloc_2d (maxfr, feat_cepsize(am->fcb), sizeof(float32)); am->feat = feat_array_alloc (f, 1); } else { /* Input is directly feature vectors */ am->mfc = NULL; am->feat = feat_array_alloc (f, maxfr); } am->dist = (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32)); am->gauden_active = bitvec_alloc (g->n_mgau); am->senscr = (int32 *) ckd_calloc (s->n_sen, sizeof(int32)); am->senscale = (int32 *) ckd_calloc (maxfr, sizeof(int32)); am->sen_active = bitvec_alloc (s->n_sen); return am; }
main (int32 argc, char *argv[]) { kb_t kb; kbcore_t *kbcore; bitvec_t active; int32 w; cmd_ln_parse (arglist, argc, argv); unlimit(); kbcore = kbcore_init (cmd_ln_float32("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), cmd_ln_str("-compsep"), cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-senmgau"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); /* Here's the perfect candidate for inheritance */ kb.mdef = kbcore->mdef; kb.dict = kbcore->dict; kb.lm = kbcore->lm; kb.fillpen = kbcore->fillpen; kb.tmat = kbcore->tmat; kb.dict2lmwid = kbcore->dict2lmwid; if ((kb.am = acoustic_init (kbcore->fcb, kbcore->gau, kbcore->sen, cmd_ln_float32("-mgaubeam"), S3_MAX_FRAMES)) == NULL) { E_FATAL("Acoustic models initialization failed\n"); } kb.beam = logs3 (cmd_ln_float64("-beam")); kb.wordbeam = logs3 (cmd_ln_float64("-wordbeam")); kb.wordmax = cmd_ln_int32("-wordmax"); /* Mark the active words and build lextree */ active = bitvec_alloc (dict_size (kb.dict)); bitvec_clear_all (active, dict_size(kb.dict)); for (w = 0; w < dict_size(kb.dict); w++) { if (IS_LMWID(kb.dict2lmwid[w]) || dict_filler_word (kb.dict, w)) bitvec_set (active, w); } kb.lextree_root = lextree_build (kb.dict, kb.mdef, active, cmd_ln_int32("-flatdepth")); kb.vithist = (glist_t *) ckd_calloc (S3_MAX_FRAMES+2, sizeof(glist_t)); kb.vithist++; /* Allow for dummy frame -1 for start word */ kb.lextree_active = NULL; kb.wd_last_sf = (int32 *) ckd_calloc (dict_size(kb.dict), sizeof(int32)); kb.tm = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t)); kb.tm_search = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t)); ctl_process (cmd_ln_str("-ctl"), cmd_ln_int32("-ctloffset"), cmd_ln_int32("-ctlcount"), decode_utt, &kb); exit(0); }
/* RAH 4.16.01 This code has several leaks that must be fixed */ dict2pid_t *dict2pid_build (mdef_t *mdef, dict_t *dict) { dict2pid_t *dict2pid; s3ssid_t *internal, **ldiph, **rdiph, *single; int32 pronlen; hash_table_t *hs, *hp; glist_t g; gnode_t *gn; s3senid_t *sen; hash_entry_t *he; int32 *cslen; int32 i, j, b, l, r, w, n, p; E_INFO("Building PID tables for dictionary\n"); dict2pid = (dict2pid_t *) ckd_calloc (1, sizeof(dict2pid_t)); dict2pid->internal = (s3ssid_t **) ckd_calloc (dict_size(dict), sizeof(s3ssid_t *)); dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d (mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->single_lc = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->n_comstate = 0; dict2pid->n_comsseq = 0; hs = hash_new (mdef->n_ciphone * mdef->n_ciphone * mdef->n_emit_state, HASH_CASE_YES); hp = hash_new (mdef->n_ciphone * mdef->n_ciphone, HASH_CASE_YES); for (w = 0, n = 0; w < dict_size(dict); w++) { pronlen = dict_pronlen(dict, w); if (pronlen < 0) E_FATAL("Pronunciation-length(%s)= %d\n", dict_wordstr(dict, w), pronlen); n += pronlen; } internal = (s3ssid_t *) ckd_calloc (n, sizeof(s3ssid_t)); /* Temporary */ ldiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); rdiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); single = (s3ssid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3ssid_t)); for (b = 0; b < mdef->n_ciphone; b++) { for (l = 0; l < mdef->n_ciphone; l++) { for (r = 0; r < mdef->n_ciphone; r++) dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; dict2pid->single_lc[b][l] = BAD_S3SSID; ldiph[b][l] = BAD_S3SSID; rdiph[b][l] = BAD_S3SSID; } single[b] = BAD_S3SSID; } for (w = 0; w < dict_size(dict); w++) { dict2pid->internal[w] = internal; pronlen = dict_pronlen(dict,w); if (pronlen >= 2) { b = dict_pron(dict, w, 0); r = dict_pron(dict, w, 1); if (NOT_S3SSID(ldiph[b][r])) { g = ldiph_comsseq(mdef, b, r); ldiph[b][r] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp); glist_free (g); for (l = 0; l < mdef_n_ciphone(mdef); l++) { p = mdef_phone_id_nearest (mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_BEGIN); dict2pid->ldiph_lc[b][r][l] = mdef_pid2ssid(mdef, p); } } internal[0] = ldiph[b][r]; for (i = 1; i < pronlen-1; i++) { l = b; b = r; r = dict_pron(dict, w, i+1); p = mdef_phone_id_nearest(mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_INTERNAL); internal[i] = mdef_pid2ssid(mdef, p); } l = b; b = r; if (NOT_S3SSID(rdiph[b][l])) { g = rdiph_comsseq(mdef, b, l); rdiph[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp); glist_free (g); } internal[pronlen-1] = rdiph[b][l]; } else if (pronlen == 1) { b = dict_pron(dict, w, 0); if (NOT_S3SSID(single[b])) { g = single_comsseq(mdef, b); single[b] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp); glist_free (g); for (l = 0; l < mdef_n_ciphone(mdef); l++) { g = single_lc_comsseq(mdef, b, l); dict2pid->single_lc[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp); glist_free (g); } } internal[0] = single[b]; } internal += pronlen; } ckd_free_2d ((void **) ldiph); ckd_free_2d ((void **) rdiph); ckd_free ((void *) single); /* Allocate space for composite state table */ cslen = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32)); g = hash_tolist(hs, &n); assert (n == dict2pid->n_comstate); n = 0; for (gn = g; gn; gn = gnode_next(gn)) { he = (hash_entry_t *) gnode_ptr (gn); sen = (s3senid_t *) hash_entry_key(he); for (i = 0; IS_S3SENID(sen[i]); i++); cslen[hash_entry_val(he)] = i+1; /* +1 for terminating sentinel */ n += (i+1); } dict2pid->comstate = (s3senid_t **) ckd_calloc (dict2pid->n_comstate, sizeof(s3senid_t *)); sen = (s3senid_t *) ckd_calloc (n, sizeof(s3senid_t)); for (i = 0; i < dict2pid->n_comstate; i++) { dict2pid->comstate[i] = sen; sen += cslen[i]; } /* Build composite state table from hash table hs */ for (gn = g; gn; gn = gnode_next(gn)) { he = (hash_entry_t *) gnode_ptr (gn); sen = (s3senid_t *) hash_entry_key(he); i = hash_entry_val(he); for (j = 0; j < cslen[i]; j++) dict2pid->comstate[i][j] = sen[j]; assert (sen[j-1] == BAD_S3SENID); ckd_free ((void *)sen); } ckd_free (cslen); glist_free (g); hash_free (hs); /* Allocate space for composite sseq table */ dict2pid->comsseq = (s3senid_t **) ckd_calloc (dict2pid->n_comsseq, sizeof(s3senid_t *)); g = hash_tolist (hp, &n); assert (n == dict2pid->n_comsseq); /* Build composite sseq table */ for (gn = g; gn; gn = gnode_next(gn)) { he = (hash_entry_t *) gnode_ptr (gn); i = hash_entry_val(he); dict2pid->comsseq[i] = (s3senid_t *) hash_entry_key(he); } glist_free (g); hash_free (hp); /* Weight for each composite state */ dict2pid->comwt = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32)); for (i = 0; i < dict2pid->n_comstate; i++) { sen = dict2pid->comstate[i]; for (j = 0; IS_S3SENID(sen[j]); j++); #if 0 /* if comstate i has N states, its weight= (1/N^2) (Major Hack!!) */ dict2pid->comwt[i] = - (logs3 ((float64)j) << 1); #else /* if comstate i has N states, its weight= 1/N */ dict2pid->comwt[i] = - logs3 ((float64)j); #endif } E_INFO("%d composite states; %d composite sseq\n", dict2pid->n_comstate, dict2pid->n_comsseq); return dict2pid; }