int ld_init(live_decoder_t *decoder, int argc, char **argv) { param_t fe_param; if (argc == 2) { /* * lgalescu: check if args need to be processed from file */ parse_args_file(argv[1]); } else cmd_ln_parse(arg_def, argc, argv); unlimit(); /* some decoder parameter capturing * !!! NOTE - HARDCODED FOR NOW. REPLACE WITH PARSE_ARG() ASAP !!!! */ memset(decoder, 0, sizeof(live_decoder_t)); kb_init(&decoder->kb); decoder->max_wpf = cmd_ln_int32 ("-maxwpf");; decoder->max_histpf = cmd_ln_int32 ("-maxhistpf"); decoder->max_hmmpf = cmd_ln_int32 ("-maxhmmpf"); decoder->phones_skip = cmd_ln_int32 ("-ptranskip"); decoder->hmm_log = cmd_ln_int32("-hmmdump") ? stderr : NULL; decoder->kbcore = decoder->kb.kbcore; decoder->kb.uttid = decoder->uttid; decoder->hypsegs = 0; decoder->num_hypsegs = 0; decoder->hypstr_len = 0; decoder->hypstr[0] = '\0'; decoder->features = feat_array_alloc(kbcore_fcb(decoder->kbcore), LIVEBUFBLOCKSIZE); decoder->ld_state = LD_STATE_IDLE; /* some front-end parameter capturing * !!! NOTE - HARDCODED FOR NOW. REPLACE WITH PARSE_ARG() ASAP !!!! */ memset(&fe_param, 0, sizeof(param_t)); fe_param.SAMPLING_RATE = (float32)cmd_ln_int32 ("-samprate"); fe_param.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); fe_param.UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); fe_param.NUM_FILTERS = cmd_ln_int32("-nfilt"); fe_param.FRAME_RATE = cmd_ln_int32("-frate"); fe_param.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha"); fe_param.FFT_SIZE = cmd_ln_int32("-nfft"); fe_param.WINDOW_LENGTH = cmd_ln_float32("-wlen"); decoder->fe = fe_init(&fe_param); if (!decoder->fe) { E_WARN("Front end initialization fe_init() failed\n"); return -1; } return 0; }
/* This routine initializes decoder variables for live mode decoding */ void live_initialize_decoder(char *live_args) { static kb_t live_kb; int32 maxcepvecs, maxhyplen, samprate, ceplen; param_t *fe_param; /*char const *uttIdNotDefined = "null";*/ parse_args_file(live_args); unlimit(); kb_init(&live_kb); kb = &live_kb; kbcore = kb->kbcore; kb->uttid = ckd_calloc(1000,sizeof(char)); hmmdumpfp = cmd_ln_int32("-hmmdump") ? stderr : NULL; maxwpf = cmd_ln_int32 ("-maxwpf"); maxhistpf = cmd_ln_int32 ("-maxhistpf"); maxhmmpf = cmd_ln_int32 ("-maxhmmpf"); ptranskip = cmd_ln_int32 ("-ptranskip"); maxhyplen = cmd_ln_int32 ("-maxhyplen"); if (!parthyp) parthyp = (partialhyp_t *) ckd_calloc(maxhyplen, sizeof(partialhyp_t)); parthyplen = 0; fe_param = (param_t *) ckd_calloc(1, sizeof(param_t)); samprate = cmd_ln_int32 ("-samprate"); if (samprate != 8000 && samprate != 16000) E_FATAL("Sampling rate %d not supported. Must be 8000 or 16000\n",samprate); fe_param->SAMPLING_RATE = (float32) samprate; fe_param->LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); fe_param->UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); fe_param->NUM_FILTERS = cmd_ln_int32("-nfilt"); /* 20040413, by ARCHAN. Clear the hardwiring. Hmm. Many people say no to do this, I just can't take it. */ fe_param->FRAME_RATE = cmd_ln_int32("-frate"); /* fe_param->FRAME_RATE = 100; */ fe_param->PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha"); fe_param->FFT_SIZE = cmd_ln_int32("-nfft"); fe_param->WINDOW_LENGTH = cmd_ln_float32("-wlen"); fe_param->doublebw=OFF; fe = fe_init(fe_param); if (!fe) E_FATAL("Front end initialization fe_init() failed\n"); maxcepvecs = cmd_ln_int32 ("-maxcepvecs"); ceplen = kbcore->fcb->cepsize; dummyframe = (float32*) ckd_calloc(1 * ceplen,sizeof(float32)); /* */ }
float64 cluster(int32 ts, uint32 n_stream, uint32 n_in_frame, uint32 *veclen, uint32 blksize, vector_t **mean, uint32 n_density, codew_t **out_label) { float64 sum_sqerr, sqerr=0; uint32 s, n_frame; const char *meth; *out_label = NULL; k_means_set_get_obs(&get_obs); for (s = 0, sum_sqerr = 0; s < n_stream; s++, sum_sqerr += sqerr) { meth = cmd_ln_str("-method"); n_frame = setup_obs(ts, s, n_in_frame, n_stream, veclen, blksize); if (strcmp(meth, "rkm") == 0) { sqerr = random_kmeans(cmd_ln_int32("-ntrial"), n_frame, veclen[s], mean[s], n_density, cmd_ln_float32("-minratio"), cmd_ln_int32("-maxiter"), out_label); if (sqerr < 0) { E_ERROR("Too few observations for kmeans\n"); return -1.0; } } else if (strcmp(meth, "fnkm") == 0) { sqerr = furthest_neighbor_kmeans(n_frame, veclen[s], mean[s], n_density, cmd_ln_float32("-minratio"), cmd_ln_int32("-maxiter")); } else { E_ERROR("I don't know how to do method '%s'. Sorry.\n", meth); } } return sum_sqerr; }
/* This routine initializes decoder variables for live mode decoding */ void live_initialize_decoder(char *live_args) { static kb_t live_kb; int32 maxcepvecs, maxhyplen, samprate, ceplen; param_t *fe_param; char const *uttIdNotDefined = "null"; parse_args_file(live_args); unlimit(); kb_init(&live_kb); kb = &live_kb; kbcore = kb->kbcore; kb->uttid = ckd_salloc(uttIdNotDefined); hmmdumpfp = cmd_ln_int32("-hmmdump") ? stderr : NULL; maxwpf = cmd_ln_int32 ("-maxwpf"); maxhistpf = cmd_ln_int32 ("-maxhistpf"); maxhmmpf = cmd_ln_int32 ("-maxhmmpf"); ptranskip = cmd_ln_int32 ("-ptranskip"); maxhyplen = cmd_ln_int32 ("-maxhyplen"); if (!parthyp) parthyp = (partialhyp_t *) ckd_calloc(maxhyplen, sizeof(partialhyp_t)); fe_param = (param_t *) ckd_calloc(1, sizeof(param_t)); samprate = cmd_ln_int32 ("-samprate"); if (samprate != 8000 && samprate != 16000) E_FATAL("Sampling rate %s not supported. Must be 8000 or 16000\n",samprate); fe_param->SAMPLING_RATE = (float32) samprate; fe_param->LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); fe_param->UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); fe_param->NUM_FILTERS = cmd_ln_int32("-nfilt"); fe_param->FRAME_RATE = 100; /* HARD CODED TO 100 FRAMES PER SECOND */ fe_param->PRE_EMPHASIS_ALPHA = (float32) 0.97; fe = fe_init(fe_param); if (!fe) E_FATAL("Front end initialization fe_init() failed\n"); maxcepvecs = cmd_ln_int32 ("-maxcepvecs"); ceplen = kbcore->fcb->cepsize; dummyframe = (float32*) ckd_calloc(1 * ceplen,sizeof(float32)); /* */ #if defined(THRD) score_barrier = thread_barrier_init(NUM_THREADS); if (!score_barrier) E_FATAL("Cannot initialize score_barrier\n"); #endif }
THREAD_START process_thread(void *aParam) { ad_rec_t *in_ad = 0; int16 samples[BUFSIZE]; int32 num_samples; cond_wait(startEvent); if ((in_ad = ad_open_sps((int) cmd_ln_float32("-samprate"))) == NULL) { printf("Failed to open audio input device\n"); exit(1); } ad_start_rec(in_ad); while (cond_wait_timed(&finishEvent, TIMEOUT) == COND_TIMEDOUT) { num_samples = ad_read(in_ad, samples, BUFSIZE); if (num_samples > 0) { /** dump the recorded audio to disk */ if (fwrite(samples, sizeof(int16), num_samples, dump) < num_samples) { printf("Error writing audio to dump file.\n"); } ld_process_raw(&decoder, samples, num_samples); } } ad_stop_rec(in_ad); ad_close(in_ad); ld_end_utt(&decoder); return 0; }
void ld_read_lm(live_decoder_t * _decoder, const char *lmpath, const char *lmname) { srch_t *s; lm_t *lm; int32 ndict; s = (srch_t *) _decoder->kb.srch; ndict = dict_size(_decoder->kb.kbcore->dict); lm = lm_read_advance(lmpath, lmname, cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), ndict, NULL, 1 /* Weight apply */ ); s->srch_add_lm(s, lm, lmname); }
void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), "1s_c_d_dd", /* Hack!! Hardwired constant for -feat argument */ cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); E_INFO("Building lextrees\n"); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; /* Create the desired no. of unigram lextrees */ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } bitvec_free (lc_active); ckd_free ((void *) lc); /* Create filler lextrees */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0]), lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef WIN32 if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
static int extract_pitch(const char *in, const char *out) { FILE *infh = NULL, *outfh = NULL; size_t flen, fshift, nsamps; int16 *buf = NULL; yin_t *yin = NULL; uint16 period, bestdiff; int32 sps; if (out) { if ((outfh = fopen(out, "w")) == NULL) { E_ERROR_SYSTEM("Failed to open %s for writing", out); goto error_out; } } else { outfh = stdout; } if ((infh = fopen(in, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open %s for reading", in); goto error_out; } /* If we weren't told what the file type is, weakly try to * determine it (actually it's pretty obvious) */ if (!(cmd_ln_boolean("-raw") || cmd_ln_boolean("-mswav") || cmd_ln_boolean("-nist"))) { if (guess_file_type(in, infh) < 0) goto error_out; } /* Grab the sampling rate and byte order from the header and also * make sure this is 16-bit linear PCM. */ if (cmd_ln_boolean("-mswav")) { if (read_riff_header(infh) < 0) goto error_out; } else if (cmd_ln_boolean("-nist")) { if (read_nist_header(infh) < 0) goto error_out; } else if (cmd_ln_boolean("-raw")) { /* Just use some defaults for sampling rate and endian. */ if (cmd_ln_str("-input_endian") == NULL) { if (WORDS_BIGENDIAN) cmd_ln_set_str("-input_endian", "big"); else cmd_ln_set_str("-input_endian", "little"); } if (cmd_ln_int32("-samprate") == 0) cmd_ln_set_int32("-samprate", 16000); } /* Now read frames and write pitch estimates. */ sps = cmd_ln_int32("-samprate"); flen = (size_t)(0.5 + sps * cmd_ln_float32("-flen")); fshift = (size_t)(0.5 + sps * cmd_ln_float32("-fshift")); yin = yin_init(flen, cmd_ln_float32("-voice_thresh"), cmd_ln_float32("-search_range"), cmd_ln_int32("-smooth_window")); if (yin == NULL) { E_ERROR("Failed to initialize YIN\n"); goto error_out; } buf = ckd_calloc(flen, sizeof(*buf)); /* Read the first full frame of data. */ if (fread(buf, sizeof(*buf), flen, infh) != flen) { /* Fail silently, which is probably okay. */ } yin_start(yin); nsamps = 0; while (!feof(infh)) { /* Process a frame of data. */ yin_write(yin, buf); if (yin_read(yin, &period, &bestdiff)) { fprintf(outfh, "%.3f %.2f %.2f\n", /* Time point. */ (double)nsamps/sps, /* "Probability" of voicing. */ bestdiff > 32768 ? 0.0 : 1.0 - (double)bestdiff / 32768, /* Pitch (possibly bogus) */ period == 0 ? sps : (double)sps / period); nsamps += fshift; } /* Shift it back and get the next frame's overlap. */ memmove(buf, buf + fshift, (flen - fshift) * sizeof(*buf)); if (fread(buf + flen - fshift, sizeof(*buf), fshift, infh) != fshift) { /* Fail silently (FIXME: really?) */ } } yin_end(yin); /* Process trailing frames of data. */ while (yin_read(yin, &period, &bestdiff)) { fprintf(outfh, "%.3f %.2f %.2f\n", /* Time point. */ (double)nsamps/sps, /* "Probability" of voicing. */ bestdiff > 32768 ? 0.0 : 1.0 - (double)bestdiff / 32768, /* Pitch (possibly bogus) */ period == 0 ? sps : (double)sps / period); } if (yin) yin_free(yin); ckd_free(buf); fclose(infh); if (outfh != stdout) fclose(outfh); return 0; error_out: yin_free(yin); ckd_free(buf); if (infh) fclose(infh); if (outfh && outfh != stdout) fclose(outfh); return -1; }
static void gst_pocketsphinx_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstPocketSphinx *sink = GST_POCKETSPHINX(object); switch (prop_id) { case PROP_HMM_DIR: gst_pocketsphinx_set_string(sink, "-hmm", value); break; case PROP_LM_FILE: /* FSG and LM are mutually exclusive. */ gst_pocketsphinx_set_string(sink, "-fsg", NULL); gst_pocketsphinx_set_string(sink, "-lm", value); /* Switch to this new LM if the decoder is active. */ if (sink->ad.initialized) { lm_read(g_value_get_string(value), g_value_get_string(value), cmd_ln_float32("-lw"), cmd_ln_float32("-uw"), cmd_ln_float32("-wip")); uttproc_set_lm(g_value_get_string(value)); } break; case PROP_DICT_FILE: gst_pocketsphinx_set_string(sink, "-dict", value); break; case PROP_FSG_FILE: /* FSG and LM are mutually exclusive */ gst_pocketsphinx_set_string(sink, "-lm", NULL); gst_pocketsphinx_set_string(sink, "-fsg", value); /* Switch to this new FSG if the decoder is active. */ if (sink->ad.initialized) { char *fsgname; fsgname = uttproc_load_fsgfile((char *) g_value_get_string(value)); if (fsgname) uttproc_set_fsg(fsgname); } break; case PROP_S2_FSG: { s2_fsg_t *fsg = g_value_get_pointer(value); uttproc_del_fsg(fsg->name); uttproc_load_fsg(g_value_get_pointer(value), cmd_ln_boolean("-fsgusealtpron"), cmd_ln_boolean("-fsgusefiller"), cmd_ln_float32("-silpen"), cmd_ln_float32("-fillpen"), cmd_ln_float32("-lw")); uttproc_set_fsg(fsg->name); break; } case PROP_FWDFLAT: gst_pocketsphinx_set_boolean(sink, "-fwdflat", value); break; case PROP_BESTPATH: gst_pocketsphinx_set_boolean(sink, "-bestpath", value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); return; } }
int main(int argc, char *argv[]) { char *lm_file; char *args_file; char *ngrams_file; char *lmLoadTimer = "LM Load"; char *lmLookupTimer = "LM Lookup"; char *ngrams[MAX_NGRAMS]; float64 lw, wip, uw, logbase; int i, n, score; int32 *nwdptr; int32 nwords[MAX_NGRAMS]; int scores[MAX_NGRAMS]; lm_t *lm; s3lmwid_t wid[MAX_NGRAMS][MAX_WORDS_PER_NGRAM]; FILE* fp; if (argc < 3) { E_FATAL("USAGE: %s <lm_file> <args_file> <ngrams_file>\n", argv[0]); } args_file = argv[1]; lm_file = argv[2]; ngrams_file = argv[3]; parse_args_file(args_file); lw = cmd_ln_float32("-lw"); wip = cmd_ln_float32("-wip"); uw = cmd_ln_float32("-uw"); logbase = cmd_ln_float32("-logbase"); logs3_init(logbase); metricsStart(lmLoadTimer); /* initialize the language model */ lm = lm_read(lm_file, lw, wip, uw); metricsStop(lmLoadTimer); if ((fp = fopen(ngrams_file, "r")) == NULL) { E_FATAL("Unable to open N-gram file %s\n", ngrams_file); } while (has_more_utterances(fp)) { /* read in all the N-grams */ n = read_ngrams(fp, ngrams, wid, nwords, MAX_NGRAMS, lm); metricsStart(lmLookupTimer); /* scores the N-grams */ for (i = 0; i < n; i++) { scores[i] = score_ngram(wid[i], nwords[i], lm); printf("%-10d %s\n", scores[i], ngrams[i]); /* printf("%-10d %s %d %d %d\n", scores[i], ngrams[i], wid[i][0], wid[i][1], wid[i][2]); */ } /* reset cache if <END_UTT> was reached */ if (n != MAX_NGRAMS) { lm_cache_reset(lm); } metricsStop(lmLookupTimer); } printf("Bigram misses: %d \n", lm->n_bg_bo); printf("Trigram misses: %d \n", lm->n_tg_bo); fflush(stdout); metricsPrint(); }
/* Update kb w/ new dictionary and new LM. * assumes: single-LM kbcore (before & after) * requires: updating kbcore * Lucian Galescu, 08/11/2005 */ void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; s3cipid_t ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; /*** clean up ***/ vithist_t *vithist = kb->vithist; if (kb->fillertree) ckd_free ((void *)kb->fillertree); if (kb->hmm_hist) ckd_free ((void *)kb->hmm_hist); /* vithist */ if (vithist) { ckd_free ((void *) vithist->entry); ckd_free ((void *) vithist->frame_start); ckd_free ((void *) vithist->bestscore); ckd_free ((void *) vithist->bestvh); ckd_free ((void *) vithist->lms2vh_root); ckd_free ((void *) kb->vithist); } /*** re-initialize ***/ kb->kbcore = kbcore_update_lm(kb->kbcore, dictfile, cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ lmfile, cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw")); if(kb->kbcore==NULL){ E_FATAL("Updating kbcore failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lm){ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if (lm) { E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ }
/********************************************************************* FUNCTION: fe_init_auto PARAMETERS: fe_t * RETURNS: nothing DESCRIPTION: automatically grab front-end parameters from command line arguments and initializes the front-end structure **********************************************************************/ fe_t * fe_init_auto() { param_t p; fe_init_params(&p); p.SAMPLING_RATE = cmd_ln_float32("-samprate"); p.FRAME_RATE = cmd_ln_int32("-frate"); p.WINDOW_LENGTH = cmd_ln_float32("-wlen"); if (strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0) p.FB_TYPE = MEL_SCALE; else if (strcmp("log_linear", cmd_ln_str("-fbtype")) == 0) p.FB_TYPE = LOG_LINEAR; else { E_WARN("Invalid fbtype\n"); return NULL; } p.NUM_CEPSTRA = cmd_ln_int32("-ncep"); p.NUM_FILTERS = cmd_ln_int32("-nfilt"); p.FFT_SIZE = cmd_ln_int32("-nfft"); p.UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); p.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); p.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha"); if (cmd_ln_boolean("-dither")) { p.dither = 1; p.seed = cmd_ln_int32("-seed"); } else p.dither = 0; #ifdef WORDS_BIGENDIAN p.swap = strcmp("big", cmd_ln_str("-input_endian")) == 0 ? 0 : 1; #else p.swap = strcmp("little", cmd_ln_str("-input_endian")) == 0 ? 0 : 1; #endif if (cmd_ln_boolean("-logspec")) p.logspec = RAW_LOG_SPEC; if (cmd_ln_boolean("-smoothspec")) p.logspec = SMOOTH_LOG_SPEC; p.doublebw = cmd_ln_boolean("-doublebw"); p.unit_area = cmd_ln_boolean("-unit_area"); p.round_filters = cmd_ln_boolean("-round_filters"); p.remove_dc = cmd_ln_boolean("-remove_dc"); p.verbose = cmd_ln_boolean("-verbose"); if (0 == strcmp(cmd_ln_str("-transform"), "dct")) p.transform = DCT_II; else if (0 == strcmp(cmd_ln_str("-transform"), "legacy")) p.transform = LEGACY_DCT; else if (0 == strcmp(cmd_ln_str("-transform"), "htk")) p.transform = DCT_HTK; else { E_WARN("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); return NULL; } p.warp_type = cmd_ln_str("-warp_type"); p.warp_params = cmd_ln_str("-warp_params"); p.lifter_val = cmd_ln_int32("-lifter"); return fe_init(&p); }
static int ld_init_impl(live_decoder_t * _decoder, int32 _internal_cmdln) { param_t fe_param; int rv = LD_SUCCESS; assert(_decoder != NULL); unlimit(); /* ARCHAN 20050708: This part should be factored with fe_parse_option */ /* allocate and initialize front-end */ fe_init_params(&fe_param); fe_param.SAMPLING_RATE = cmd_ln_float32("-samprate"); fe_param.FRAME_RATE = cmd_ln_int32("-frate"); fe_param.WINDOW_LENGTH = cmd_ln_float32("-wlen"); fe_param.FB_TYPE = strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0 ? MEL_SCALE : LOG_LINEAR; fe_param.NUM_CEPSTRA = cmd_ln_int32("-ncep"); fe_param.NUM_FILTERS = cmd_ln_int32("-nfilt"); fe_param.FFT_SIZE = cmd_ln_int32("-nfft"); fe_param.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); fe_param.UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); fe_param.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha"); fe_param.dither = strcmp("no", cmd_ln_str("-dither")); fe_param.warp_type = cmd_ln_str("-warp_type"); fe_param.warp_params = cmd_ln_str("-warp_params"); if ((_decoder->fe = fe_init(&fe_param)) == NULL) { E_WARN("Failed to initialize front-end.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_init_impl_cleanup; } /* capture decoder parameters */ kb_init(&_decoder->kb); /* initialize decoder variables */ _decoder->kbcore = _decoder->kb.kbcore; _decoder->hyp_frame_num = -1; _decoder->uttid = NULL; _decoder->ld_state = LD_STATE_IDLE; _decoder->hyp_str = NULL; _decoder->hyp_segs = NULL; /* _decoder->swap= (cmd_ln_int32("-machine_endian") != cmd_ln_int32("-input_endian")); */ _decoder->swap = (strcmp(cmd_ln_str("-machine_endian"), cmd_ln_str("-input_endian")) != 0); _decoder->phypdump = (cmd_ln_int32("-phypdump")); _decoder->rawext = (cmd_ln_str("-rawext")); if (_decoder->phypdump) E_INFO("Partial hypothesis WILL be dumped\n"); else E_INFO("Partial hypothesis will NOT be dumped\n"); if (_decoder->swap) E_INFO("Input data WILL be byte swapped\n"); else E_INFO("Input data will NOT be byte swapped\n"); _decoder->internal_cmdln = _internal_cmdln; _decoder->features = feat_array_alloc(kbcore_fcb(_decoder->kbcore), LIVEBUFBLOCKSIZE); if (_decoder->features == NULL) { E_WARN("Failed to allocate internal feature buffer.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_init_impl_cleanup; } return LD_SUCCESS; ld_init_impl_cleanup: if (_decoder->fe != NULL) { fe_close(_decoder->fe); } if (_decoder->features != NULL) { /* consult the implementation of feat_array_alloc() for how to free our * internal feature vector buffer */ ckd_free((void *) **_decoder->features); ckd_free_2d((void **) _decoder->features); } if (_internal_cmdln == TRUE) { cmd_ln_free(); } _decoder->ld_state = LD_STATE_FINISHED; return rv; }
main (int32 argc, char *argv[]) { kb_t kb; kbcore_t *kbcore; bitvec_t active; int32 w; cmd_ln_parse (arglist, argc, argv); unlimit(); kbcore = kbcore_init (cmd_ln_float32("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), cmd_ln_str("-compsep"), cmd_ln_str("-lm"), cmd_ln_str("-fillpen"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-senmgau"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); /* Here's the perfect candidate for inheritance */ kb.mdef = kbcore->mdef; kb.dict = kbcore->dict; kb.lm = kbcore->lm; kb.fillpen = kbcore->fillpen; kb.tmat = kbcore->tmat; kb.dict2lmwid = kbcore->dict2lmwid; if ((kb.am = acoustic_init (kbcore->fcb, kbcore->gau, kbcore->sen, cmd_ln_float32("-mgaubeam"), S3_MAX_FRAMES)) == NULL) { E_FATAL("Acoustic models initialization failed\n"); } kb.beam = logs3 (cmd_ln_float64("-beam")); kb.wordbeam = logs3 (cmd_ln_float64("-wordbeam")); kb.wordmax = cmd_ln_int32("-wordmax"); /* Mark the active words and build lextree */ active = bitvec_alloc (dict_size (kb.dict)); bitvec_clear_all (active, dict_size(kb.dict)); for (w = 0; w < dict_size(kb.dict); w++) { if (IS_LMWID(kb.dict2lmwid[w]) || dict_filler_word (kb.dict, w)) bitvec_set (active, w); } kb.lextree_root = lextree_build (kb.dict, kb.mdef, active, cmd_ln_int32("-flatdepth")); kb.vithist = (glist_t *) ckd_calloc (S3_MAX_FRAMES+2, sizeof(glist_t)); kb.vithist++; /* Allow for dummy frame -1 for start word */ kb.lextree_active = NULL; kb.wd_last_sf = (int32 *) ckd_calloc (dict_size(kb.dict), sizeof(int32)); kb.tm = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t)); kb.tm_search = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t)); ctl_process (cmd_ln_str("-ctl"), cmd_ln_int32("-ctloffset"), cmd_ln_int32("-ctlcount"), decode_utt, &kb); exit(0); }
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */ void kb_init (kb_t *kb) { kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; lm_t *lm; lmset_t *lmset; s3cipid_t sil, ci; s3wid_t w; int32 i, n, n_lc; wordprob_t *wp; s3cipid_t *lc; bitvec_t lc_active; char *str; int32 cisencnt; int32 j; /* Initialize the kb structure to zero, just in case */ memset(kb, 0, sizeof(*kb)); kb->kbcore = NULL; kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"), cmd_ln_str("-feat"), cmd_ln_str("-cmn"), cmd_ln_str("-varnorm"), cmd_ln_str("-agc"), cmd_ln_str("-mdef"), cmd_ln_str("-dict"), cmd_ln_str("-fdict"), "", /* Hack!! Hardwired constant for -compsep argument */ cmd_ln_str("-lm"), cmd_ln_str("-lmctlfn"), cmd_ln_str("-lmdumpdir"), cmd_ln_str("-fillpen"), cmd_ln_str("-senmgau"), cmd_ln_float32("-silprob"), cmd_ln_float32("-fillprob"), cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), cmd_ln_str("-mean"), cmd_ln_str("-var"), cmd_ln_float32("-varfloor"), cmd_ln_str("-mixw"), cmd_ln_float32("-mixwfloor"), cmd_ln_str("-subvq"), cmd_ln_str("-gs"), cmd_ln_str("-tmat"), cmd_ln_float32("-tmatfloor")); if(kb->kbcore==NULL){ E_FATAL("Initialization of kb failed\n"); } kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); lm = kbcore_lm(kbcore); lmset=kbcore_lmset(kbcore); d2p = kbcore_dict2pid(kbcore); if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict))) E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD); if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm))) E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name); } }else if(lm){ if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm))) E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD); } /* Check that HMM topology restrictions are not violated */ if (tmat_chk_1skip (kbcore->tmat) < 0) E_FATAL("Tmat contains arcs skipping more than 1 state\n"); /* * Unlink <s> and </s> between dictionary and LM, to prevent their * recognition. They are merely dummy words (anchors) at the beginning * and end of each utterance. */ if(lmset){ for(i=0;i<kbcore_nlm(kbcore);i++){ lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID; lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID; } }else if(lm){ /* No LM is set at this point*/ lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID; lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID; for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w)) kbcore->dict2lmwid[w] = BAD_S3LMWID; } sil = mdef_silphone (kbcore_mdef (kbcore)); if (NOT_S3CIPID(sil)) E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE); kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32)); kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32)); kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32)); /* Build set of all possible left contexts */ lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); lc_active = bitvec_alloc (mdef_n_ciphone (mdef)); for (w = 0; w < dict_size (dict); w++) { ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1); if (! mdef_is_fillerphone (mdef, (int)ci)) bitvec_set (lc_active, ci); } ci = mdef_silphone(mdef); bitvec_set (lc_active, ci); for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) { if (bitvec_is_set (lc_active, ci)) lc[n_lc++] = ci; } lc[n_lc] = BAD_S3CIPID; E_INFO("Building lextrees\n"); /* Get the number of lexical tree*/ kb->n_lextree = cmd_ln_int32 ("-Nlextree"); if (kb->n_lextree < 1) { E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", kb->n_lextree); kb->n_lextree = 1; } /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */ /* Build active word list */ wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t)); if(lmset){ kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *)); /* Just allocate pointers*/ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for(i=0;i<kbcore_nlm(kbcore);i++){ E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name); n=0; for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */ wp[j].wid=-1; wp[j].prob=-1; } n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d.\n",n); if (n < 1) E_FATAL("%d active words in %s\n", n,lmset[i].name); n = wid_wordprob2alt(dict,wp,n); E_INFO("Size of word table after adding alternative prons: %d.\n",n); if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } for (j = 0; j < kb->n_lextree; j++) { kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0; E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n", kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j])); } } }else if (lm){ E_INFO("Creating Unigram Table\n"); n=0; n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp); E_INFO("Size of word table after unigram + words in class: %d\n",n); if (n < 1) E_FATAL("%d active words\n", n); n = wid_wordprob2alt (dict, wp, n); /* Add alternative pronunciations */ /* Retain or remove unigram probs from lextree, depending on option */ if (cmd_ln_int32("-treeugprob") == 0) { for (i = 0; i < n; i++) wp[i].prob = -1; /* Flatten all initial probabilities */ } /* Create the desired no. of unigram lextrees */ kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *)); for (i = 0; i < kb->n_lextree; i++) { kb->ugtree[i] = lextree_build (kbcore, wp, n, lc); lextree_type (kb->ugtree[i]) = 0; } E_INFO("Lextrees(%d), %d nodes(ug)\n", kb->n_lextree, lextree_n_node(kb->ugtree[0])); } /* Create filler lextrees */ /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */ n = 0; for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) { if (dict_filler_word(dict, i)) { wp[n].wid = i; wp[n].prob = fillpen (kbcore->fillpen, i); n++; } } kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*)); for (i = 0; i < kb->n_lextree; i++) { kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL); lextree_type (kb->fillertree[i]) = -1; } ckd_free ((void *) wp); ckd_free ((void *) lc); bitvec_free (lc_active); E_INFO("Lextrees(%d), %d nodes(filler)\n", kb->n_lextree, lextree_n_node(kb->fillertree[0])); if (cmd_ln_int32("-lextreedump")) { if(lmset){ E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n"); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "UGTREE %d\n", i); lextree_dump (kb->ugtree[i], dict, stderr); } for (i = 0; i < kb->n_lextree; i++) { fprintf (stderr, "FILLERTREE %d\n", i); lextree_dump (kb->fillertree[i], dict, stderr); } fflush (stderr); } kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), kbcore->dict2pid->n_comstate); kb->beam = beam_init (cmd_ln_float64("-subvqbeam"), cmd_ln_float64("-beam"), cmd_ln_float64("-pbeam"), cmd_ln_float64("-wbeam")); E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n", kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq); /*Sections of optimization related parameters*/ kb->ds_ratio=cmd_ln_int32("-ds"); E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio); kb->rec_bstcid=-1; kb->skip_count=0; kb->cond_ds=cmd_ln_int32("-cond_ds"); E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds); if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n"); kb->gs4gs=cmd_ln_int32("-gs4gs"); E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs); kb->svq4svq=cmd_ln_int32("-svq4svq"); E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq); kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam")); E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam); if(kb->ci_pbeam>10000000){ E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n"); } kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam")); E_INFO("Word-end pruning beam: %d\n",kb->wend_beam); kb->pl_window=cmd_ln_int32("-pl_window"); E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window); kb->pl_window_start=0; kb->pl_beam=logs3(cmd_ln_float32("-pl_beam")); E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam); for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) ; kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32)); kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32)); kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32)); if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL) E_FATAL("feat_array_alloc() failed\n"); kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist")); ptmr_init (&(kb->tm_sen)); ptmr_init (&(kb->tm_srch)); ptmr_init (&(kb->tm_ovrhd)); kb->tot_fr = 0; kb->tot_sen_eval = 0.0; kb->tot_gau_eval = 0.0; kb->tot_hmm_eval = 0.0; kb->tot_wd_exit = 0.0; kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize"); if(lmset) n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; else n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree; n /= kb->hmm_hist_binsize; kb->hmm_hist_bins = n+1; kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32)); /* Really no need for +1 */ /* Open hypseg file if specified */ str = cmd_ln_str("-hypseg"); kb->matchsegfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchsegfp = fopen(str, "wt")) == NULL) #else if ((kb->matchsegfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } str = cmd_ln_str("-hyp"); kb->matchfp = NULL; if (str) { #ifdef SPEC_CPU_WINDOWS if ((kb->matchfp = fopen(str, "wt")) == NULL) #else if ((kb->matchfp = fopen(str, "w")) == NULL) #endif E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str); } }
/* the following function is used for MMIE training lqin 2010-03 */ static int mmi_normalize() { uint32 i; uint32 n_mgau; uint32 n_stream; uint32 n_density; vector_t ***in_mean = NULL; vector_t ***in_var = NULL; vector_t ***wt_mean = NULL; vector_t ***wt_var = NULL; const uint32 *veclen = NULL; const char **accum_dir; const char *in_mean_fn; const char *out_mean_fn; const char *in_var_fn; const char *out_var_fn; vector_t ***wt_num_mean = NULL; vector_t ***wt_den_mean = NULL; vector_t ***wt_num_var = NULL; vector_t ***wt_den_var = NULL; float32 ***num_dnom = NULL; float32 ***den_dnom = NULL; uint32 n_num_mgau; uint32 n_den_mgau; uint32 n_num_stream; uint32 n_den_stream; uint32 n_num_density; uint32 n_den_density; float32 constE; uint32 n_temp_mgau; uint32 n_temp_stream; uint32 n_temp_density; const uint32 *temp_veclen = NULL; accum_dir = cmd_ln_str_list("-accumdir"); /* the following variables are used for mmie training */ out_mean_fn = cmd_ln_str("-meanfn"); out_var_fn = cmd_ln_str("-varfn"); in_mean_fn = cmd_ln_str("-inmeanfn"); in_var_fn = cmd_ln_str("-invarfn"); constE = cmd_ln_float32("-constE"); /* get rid of some unnecessary parameters */ if (cmd_ln_int32("-fullvar")) { E_FATAL("Current MMIE training can not be done for full variance, set -fulllvar as no\n"); } if (cmd_ln_int32("-tiedvar")) { E_FATAL("Current MMIE training can not be done for tied variance, set -tiedvar as no\n"); } if (cmd_ln_str("-mixwfn")) { E_FATAL("Current MMIE training does not support mixture weight update, remove -mixwfn \n"); } if (cmd_ln_str("-inmixwfn")) { E_FATAL("Current MMIE training does not support mixture weight update, remove -inmixwfn \n"); } if (cmd_ln_str("-tmatfn")) { E_FATAL("Current MMIE training does not support transition matrix update, remove -tmatfn \n"); } if (cmd_ln_str("-regmatfn")) { E_FATAL("Using norm for computing regression matrix is obsolete, please use mllr_transform \n"); } /* must be at least one accum dir */ if (accum_dir[0] == NULL) { E_FATAL("No accumulated reestimation path is specified, use -accumdir \n"); } /* at least update mean or variance parameters */ if (out_mean_fn == NULL && out_var_fn == NULL) { E_FATAL("Neither -meanfn nor -varfn is specified, at least do mean or variance update \n"); } else if (out_mean_fn == NULL) { E_INFO("No -meanfn specified, will skip if any\n"); } else if (out_var_fn == NULL) { E_INFO("No -varfn specified, will skip if any\n"); } /* read input mean */ if (in_mean_fn != NULL) { E_INFO("read original density mean parameters from %s\n", in_mean_fn); if (s3gau_read(in_mean_fn, &in_mean, &n_mgau, &n_stream, &n_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)veclen); veclen = NULL; } /* read input variance */ if (in_var_fn != NULL) { E_INFO("read original density variance parameters from %s\n", in_var_fn); if (s3gau_read(in_var_fn, &in_var, &n_mgau, &n_stream, &n_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } ckd_free((void *)veclen); veclen = NULL; } /* read accumulated numerator and denominator counts */ for (i = 0; accum_dir[i]; i++) { E_INFO("Reading and accumulating counts from %s\n", accum_dir[i]); rdacc_mmie_den(accum_dir[i], "numlat", &wt_num_mean, &wt_num_var, &num_dnom, &n_num_mgau, &n_num_stream, &n_num_density, &veclen); rdacc_mmie_den(accum_dir[i], "denlat", &wt_den_mean, &wt_den_var, &den_dnom, &n_den_mgau, &n_den_stream, &n_den_density, &veclen); if (n_num_mgau != n_den_mgau) E_FATAL("number of gaussians inconsistent between num and den lattice\n"); else if (n_num_mgau != n_mgau) E_FATAL("number of gaussians inconsistent between imput model and accumulator (%u != %u)\n", n_mgau, n_num_mgau); if (n_num_stream != n_den_stream) E_FATAL("number of gaussian streams inconsistent between num and den lattice\n"); else if (n_num_stream != n_stream) E_FATAL("number of gaussian streams inconsistent between imput model and accumulator (%u != %u)\n", n_stream, n_num_stream); if (n_num_density != n_den_density) E_FATAL("number of gaussian densities inconsistent between num and den lattice\n"); else if (n_num_density != n_density) E_FATAL("number of gaussian densities inconsistent between imput model and accumulator (%u != %u)\n", n_density, n_num_density); } /* initialize update parameters as the input parameters */ if (out_mean_fn) { if (s3gau_read(in_mean_fn, &wt_mean, &n_temp_mgau, &n_temp_stream, &n_temp_density, &temp_veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)temp_veclen); temp_veclen = NULL; } if (out_var_fn) { if (s3gau_read(in_var_fn, &wt_var, &n_temp_mgau, &n_temp_stream, &n_temp_density, &temp_veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } ckd_free((void *)temp_veclen); temp_veclen = NULL; } /* update mean parameters */ if (wt_mean) { if (out_mean_fn) { E_INFO("Normalizing mean for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mmie_mean(in_mean, wt_mean, wt_num_mean, wt_den_mean, in_var, wt_num_var, wt_den_var, num_dnom, den_dnom, n_mgau, n_stream, n_density, veclen, constE); } else { E_INFO("Ignoring means since -meanfn not specified\n"); } } else { E_INFO("No means to normalize\n"); } /* update variance parameters */ if (wt_var) { if (out_var_fn) { E_INFO("Normalizing variance for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mmie_var(in_var, wt_var, wt_num_var, wt_den_var, num_dnom, den_dnom, in_mean, wt_mean, wt_num_mean, wt_den_mean, n_mgau, n_stream, n_density, veclen, constE); } else { E_INFO("Ignoring variances since -varfn not specified\n"); } } else { E_INFO("No variances to normalize\n"); } /* write the updated mean parameters to files */ if (out_mean_fn) { if (wt_mean) { if (s3gau_write(out_mean_fn, (const vector_t ***)wt_mean, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO reestimated means seen, but -meanfn specified\n"); } } else { if (wt_mean) { E_INFO("Reestimated means seen, but -meanfn NOT specified\n"); } } /* write the updated variance parameters to files */ if (out_var_fn) { if (wt_var) { if (s3gau_write(out_var_fn, (const vector_t ***)wt_var, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO reestimated variances seen, but -varfn specified\n"); } } else { if (wt_var) { E_INFO("Reestimated variances seen, but -varfn NOT specified\n"); } } if (veclen) ckd_free((void *)veclen); if (temp_veclen) ckd_free((void *)temp_veclen); return S3_SUCCESS; }
static int initialize(int argc, char *argv[]) { const char *fdictfn; const char *dictfn; const char *ts2cbfn; uint32 n_ts; uint32 n_cb; /* define, parse and (partially) validate the command line */ parse_cmd_ln(argc, argv); feat = feat_init(cmd_ln_str("-feat"), cmn_type_from_str(cmd_ln_str("-cmn")), cmd_ln_boolean("-varnorm"), agc_type_from_str(cmd_ln_str("-agc")), 1, cmd_ln_int32("-ceplen")); if (cmd_ln_str("-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str("-lda")); if (feat_read_lda(feat, cmd_ln_str("-lda"), cmd_ln_int32("-ldadim")) < 0) return -1; } if (cmd_ln_str("-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str("-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL) return -1; if ((feat_set_subvecs(feat, subvecs)) < 0) return -1; } if (cmd_ln_exists("-agcthresh") && 0 != strcmp(cmd_ln_str("-agc"), "none")) { agc_set_threshold(feat->agc_struct, cmd_ln_float32("-agcthresh")); } if (feat->cmn_struct && cmd_ln_exists("-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str("-cmninit")); c = vallist; nvals = 0; while (nvals < feat->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); c = cc + 1; ++nvals; } if (nvals < feat->cmn_struct->veclen && *c != '\0') { feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); } ckd_free(vallist); } if (cmd_ln_str("-segdir")) corpus_set_seg_dir(cmd_ln_str("-segdir")); if (cmd_ln_str("-segext")) corpus_set_seg_ext(cmd_ln_str("-segext")); corpus_set_mfcc_dir(cmd_ln_str("-cepdir")); corpus_set_mfcc_ext(cmd_ln_str("-cepext")); if (cmd_ln_str("-lsnfn")) corpus_set_lsn_filename(cmd_ln_str("-lsnfn")); corpus_set_ctl_filename(cmd_ln_str("-ctlfn")); if (cmd_ln_int32("-nskip") && cmd_ln_int32("-runlen")) { corpus_set_interval(cmd_ln_int32("-nskip"), cmd_ln_int32("-runlen")); } else if (cmd_ln_int32("-part") && cmd_ln_int32("-npart")) { corpus_set_partition(cmd_ln_int32("-part"), cmd_ln_int32("-npart")); } if (corpus_init() != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_str("-moddeffn")) { E_INFO("Reading %s\n", cmd_ln_str("-moddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&mdef, cmd_ln_str("-moddeffn")) != S3_SUCCESS) { return S3_ERROR; } ts2cbfn = cmd_ln_str("-ts2cbfn"); if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { mdef->cb = semi_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { mdef->cb = cont_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = mdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { mdef->cb = ptm_ts2cb(mdef); n_ts = mdef->n_tied_state; n_cb = mdef->acmod_set->n_ci; } else if (s3ts2cb_read(ts2cbfn, &mdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } dictfn = cmd_ln_str("-dictfn"); if (dictfn == NULL) { E_FATAL("You must specify a content dictionary using -dictfn\n"); } E_INFO("Reading %s\n", dictfn); lex = lexicon_read(NULL, /* no lexicon to start */ dictfn, mdef->acmod_set); if (lex == NULL) return S3_ERROR; fdictfn = cmd_ln_str("-fdictfn"); if (fdictfn) { E_INFO("Reading %s\n", fdictfn); (void)lexicon_read(lex, /* add filler words content lexicon */ fdictfn, mdef->acmod_set); } } return S3_SUCCESS; }
int main_initialize(int argc, char *argv[], lexicon_t **out_lex, model_def_t **out_omdef, model_def_t **out_dmdef, feat_t** out_feat) { model_def_t *dmdef = NULL; model_def_t *omdef = NULL; lexicon_t *lex = NULL; feat_t *feat; const char *fn; uint32 n_ts; uint32 n_cb; const char *ts2cbfn; parse_cmd_ln(argc, argv); feat = feat_init(cmd_ln_str("-feat"), cmn_type_from_str(cmd_ln_str("-cmn")), cmd_ln_boolean("-varnorm"), agc_type_from_str(cmd_ln_str("-agc")), 1, cmd_ln_int32("-ceplen")); if (cmd_ln_str("-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str("-lda")); if (feat_read_lda(feat, cmd_ln_str("-lda"), cmd_ln_int32("-ldadim")) < 0) return -1; } if (cmd_ln_str("-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str("-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL) return -1; if ((feat_set_subvecs(feat, subvecs)) < 0) return -1; } if (cmd_ln_exists("-agcthresh") && 0 != strcmp(cmd_ln_str("-agc"), "none")) { agc_set_threshold(feat->agc_struct, cmd_ln_float32("-agcthresh")); } if (feat->cmn_struct && cmd_ln_exists("-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str("-cmninit")); c = vallist; nvals = 0; while (nvals < feat->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); c = cc + 1; ++nvals; } if (nvals < feat->cmn_struct->veclen && *c != '\0') { feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); } ckd_free(vallist); } *out_feat = feat; if (cmd_ln_str("-omoddeffn")) { E_INFO("Reading output model definitions: %s\n", cmd_ln_str("-omoddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&omdef, cmd_ln_str("-omoddeffn")) != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_str("-dmoddeffn")) { E_INFO("Reading dump model definitions: %s\n", cmd_ln_str("-dmoddeffn")); if (model_def_read(&dmdef, cmd_ln_str("-dmoddeffn")) != S3_SUCCESS) { return S3_ERROR; } setup_d2o_map(dmdef, omdef); } else { E_INFO("Assuming dump and output model definitions are identical\n"); } ts2cbfn = cmd_ln_str("-ts2cbfn"); if (ts2cbfn) { if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { omdef->cb = semi_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { omdef->cb = cont_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = omdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { omdef->cb = ptm_ts2cb(omdef); n_ts = omdef->n_tied_state; n_cb = omdef->acmod_set->n_ci; } else if (s3ts2cb_read(cmd_ln_str("-ts2cbfn"), &omdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } if (omdef->n_tied_state != n_ts) { E_FATAL("Model definition file n_tied_state = %u, but %u mappings in ts2cb\n", omdef->n_tied_state, n_ts); } } } else { E_INFO("No mdef files. Assuming 1-class init\n"); } *out_omdef = omdef; *out_dmdef = dmdef; fn = cmd_ln_str("-dictfn"); if (fn) { E_INFO("Reading main lexicon: %s\n", fn); lex = lexicon_read(NULL, fn, omdef->acmod_set); if (lex == NULL) return S3_ERROR; } fn = cmd_ln_str("-fdictfn"); if (fn) { E_INFO("Reading filler lexicon: %s\n", fn); (void)lexicon_read(lex, fn, omdef->acmod_set); } *out_lex = lex; stride = cmd_ln_int32("-stride"); return S3_SUCCESS; }
int main(int argc, char *argv[]) { const char *out_mllr_fn; const char **accum_dir; const char *mean_fn; const char *var_fn; const char *cb2mllrfn; const char *moddeffn; uint32 cdonly; uint32 mllr_mult; /* option 0 or 1 */ uint32 mllr_add; /* option 0 or 1 */ float32 varfloor; vector_t ***mean = NULL; /* baseline mean */ vector_t ***wt_mean = NULL; /* read from bw accum */ float32 ***wt_dcount = NULL; /* read from bw accum */ vector_t ***wt_var = NULL; /* not used */ int32 pass2var; /* not used */ uint32 n_mgau; uint32 n_stream; uint32 n_density; uint32 n_mllr_class; uint32 n_mgau_rd; uint32 n_stream_rd; uint32 n_density_rd; uint32 *veclen = NULL; uint32 *veclen_rd = NULL; float32 ****A = NULL; /* Output mllr: A */ float32 ***B = NULL; /* Output mllr: B */ int32 *cb2mllr = NULL; /* int32 **mllr2cb = NULL; int32 *n_mllr2cb = NULL;*/ model_def_t *mdef; uint32 gau_begin; uint32 i,s ; if (initialize(argc, argv) != S3_SUCCESS) { E_FATAL("errors initializing.\n"); } out_mllr_fn = cmd_ln_str("-outmllrfn"); accum_dir = cmd_ln_str_list("-accumdir"); mean_fn = cmd_ln_str("-meanfn"); var_fn = cmd_ln_str("-varfn"); cb2mllrfn = cmd_ln_str("-cb2mllrfn"); cdonly = cmd_ln_int32("-cdonly"); moddeffn = cmd_ln_str("-moddeffn"); mllr_mult = cmd_ln_int32("-mllrmult"); mllr_add = cmd_ln_int32("-mllradd"); varfloor = cmd_ln_float32("-varfloor"); assert(accum_dir[0] != NULL); /* must be at least one accum dir */ if (! (out_mllr_fn && accum_dir && mean_fn)) { E_FATAL("Some of options are missing.\n"); } if (varfloor < 0.) { E_FATAL("varfloor is negative (%e)\n",varfloor); } if (cb2mllrfn && strcmp(cb2mllrfn,"NO") == 0) { cb2mllrfn = NULL; } if (moddeffn && strcmp(moddeffn,"NO") == 0) { moddeffn = NULL; } /*--------------------------------------------------------------------*/ fprintf(stderr,"\n"); E_INFO("-- 1. Read input mean, (var) and accumulation.\n"); /*--------------------------------------------------------------------*/ /*-------------- Read baseline mean --------------*/ if (s3gau_read(mean_fn, &mean, &n_mgau, &n_stream, &n_density, &veclen) != S3_SUCCESS) { E_FATAL("Couldn't read %s", mean_fn); } /*-------- Read accum_dir (accumulation from bw) --------*/ for (i = 0; accum_dir[i]; i++) { E_INFO("Reading and accumulating counts from %s\n", accum_dir[i]); if (rdacc_den(accum_dir[i], &wt_mean, &wt_var, &pass2var, &wt_dcount, &n_mgau_rd, &n_stream_rd, &n_density_rd, &veclen_rd) != S3_SUCCESS) { E_FATAL("Error in reading %s\n", accum_dir[i]); } } if (n_mgau != n_mgau_rd) { E_FATAL("n_mgau mismatch (%u : %u)\n",n_mgau,n_mgau_rd); } if (n_stream != n_stream_rd) { E_FATAL("n_stream mismatch (%u : %u)\n",n_stream,n_stream_rd); } if (n_density != n_density_rd) { E_FATAL("n_density mismatch (%u : %u)\n",n_density,n_density_rd); } for (s = 0; s < n_stream; s++) { if (veclen[s] != veclen_rd[s]) { E_FATAL("vector length of stream %u (== %u) " "!= prior length (== %u)\n", s, veclen_rd[s], veclen[s]); } } ckd_free((void *)veclen_rd); veclen_rd = NULL; if (wt_var) { /* We don't use 'wt_var' in this program. */ gauden_free_param(wt_var); wt_var = NULL; } /*--------------------------------------------------------------------*/ fprintf(stderr,"\n"); E_INFO("-- 2. Read cb2mllrfn\n"); /*--------------------------------------------------------------------*/ if (strcmp(cb2mllrfn, ".1cls.") == 0) { n_mllr_class = 1; cb2mllr = (int32 *) ckd_calloc(n_mgau, sizeof(int32)); } else { if (s3cb2mllr_read(cb2mllrfn, &cb2mllr, &n_mgau_rd, &n_mllr_class) != S3_SUCCESS) { E_FATAL("Unable to read %s\n",cb2mllrfn); } if (n_mgau_rd != n_mgau) { E_FATAL("cb2mllr maps %u cb, but read %u cb from files\n", n_mgau_rd, n_mgau); } } E_INFO("n_mllr_class = %d\n", n_mllr_class); gau_begin = 0; if (cdonly) { if (! moddeffn) { E_FATAL("-moddeffn is not given.\n"); } else if (model_def_read(&mdef, moddeffn) != S3_SUCCESS) { E_FATAL("Can not read model definition file %s\n", moddeffn); } gau_begin = mdef->n_tied_ci_state; for (i=0; i<gau_begin; i++) { cb2mllr[i] = -1; /* skip CI senones */ } E_INFO("Use CD senones only. (index >= %d)\n",mdef->n_tied_ci_state); } /*--------------------------------------------------------------------*/ fprintf(stderr,"\n"); E_INFO("-- 3. Calculate mllr matrices\n"); /*--------------------------------------------------------------------*/ mllr_mat(&A, &B, var_fn, mean, wt_mean, wt_dcount, gau_begin, cb2mllr, mllr_mult, mllr_add, varfloor, n_mgau, n_stream, n_density, n_mllr_class, veclen); /*--------------------------------------------------------------------*/ fprintf(stderr,"\n"); E_INFO("-- 4. Store mllr matrices (A,B) to %s\n", out_mllr_fn); fflush(stderr); /*--------------------------------------------------------------------*/ if(store_reg_mat(out_mllr_fn, veclen, n_mllr_class, n_stream, A, B) != S3_SUCCESS) { E_FATAL("Unable to write %s\n", out_mllr_fn); } ckd_free((void *)veclen); free_mllr_A(A, n_mllr_class, n_stream); free_mllr_B(B, n_mllr_class, n_stream); return 0 ; }
int mk_node(dtree_node_t *node, uint32 node_id, uint32 *id, uint32 n_id, float32 ****mixw, float32 ****means, float32 ****vars, uint32 *veclen, uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, float32 mwfloor) { float32 ***mixw_occ, **dist; uint32 mm, m, s, j, k; float64 *dnom, norm, wt_ent, s_wt_ent, occ; float32 mx_wt; uint32 *l_id; float32 ***lmeans=0,***lvars=0; float32 varfloor=0; uint32 continuous, sumveclen; char* type; type = (char *)cmd_ln_str("-ts2cbfn"); if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0) E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type); if (strcmp(type,".cont.") == 0) continuous = 1; else continuous = 0; if (continuous == 1) { varfloor = cmd_ln_float32("-varfloor"); /* Sumveclen is overallocation, but coding is simpler */ for (j=0,sumveclen=0; j < n_stream; j++) sumveclen += veclen[j]; lmeans = (float32 ***) ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); lvars = (float32 ***) ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); } mixw_occ = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); dist = (float32 **)ckd_calloc_2d(n_stream, n_density, sizeof(float32)); dnom = (float64 *)ckd_calloc(n_stream, sizeof(float64)); /* Merge distributions of all the elements in a cluster for combined distribution */ for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { float32 *lmeanvec=0, *lvarvec=0; if (continuous == 1) { lmeanvec = lmeans[s][j]; lvarvec = lvars[s][j]; } for (mm = 0; mm < n_id; mm++) { m = id[mm]; for (k = 0; k < n_density; k++) { mixw_occ[s][j][k] += mixw[m][s][j][k]; } /* For continuous hmms we have only one gaussian per state */ if (continuous == 1) { for (k = 0; k < veclen[j]; k++) { lmeanvec[k] += mixw[m][s][j][0] * means[m][s][j][k]; lvarvec[k] += mixw[m][s][j][0] * (vars[m][s][j][k] + means[m][s][j][k] * means[m][s][j][k]); } } } if (continuous == 1) { if (mixw_occ[s][j][0] != 0) { for (k = 0; k < veclen[j]; k++) { lmeanvec[k] /= mixw_occ[s][j][0]; lvarvec[k] = lvarvec[k]/mixw_occ[s][j][0] - lmeanvec[k]*lmeanvec[k]; if (lvarvec[k] < varfloor) lvarvec[k] = varfloor; } } else { for (k = 0; k < veclen[j]; k++) if (lmeanvec[k] != 0) E_FATAL("denominator = 0, but numerator = %f at k = %d\n",lmeanvec[k],k); } } } } /* Find out which state is under consideration */ for (j = 0, mx_wt = 0, s = 0; s < n_state; s++) { if (stwt[s] > mx_wt) { mx_wt = stwt[s]; j = s; } } /* occ is the same for each independent feature, so just choose 0 */ for (k = 0, occ = 0; k < n_density; k++) { occ += mixw_occ[j][0][k]; } for (s = 0, wt_ent = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0, dnom[j] = 0; k < n_density; k++) { dnom[j] += mixw_occ[s][j][k]; } } for (j = 0, s_wt_ent = 0; j < n_stream; j++) { norm = 1.0 / dnom[j]; /* discrete_entropy for discrete case, continuous entropy for continuous HMMs */ if (continuous != 1) { for (k = 0; k < n_density; k++) { dist[j][k] = mixw_occ[s][j][k] * norm; if (dist[j][k] < mwfloor) dist[j][k] = mwfloor; } s_wt_ent += dnom[j] * ent_d(dist[j], n_density); } else { s_wt_ent += dnom[j] * ent_cont(lmeans[s][j], lvars[s][j], veclen[j]); } } wt_ent += stwt[s] * s_wt_ent; } node->node_id = node_id; l_id = ckd_calloc(n_id, sizeof(uint32)); for (j = 0; j < n_id; j++) { l_id[j] = id[j]; } node->id = l_id; node->n_id = n_id; node->mixw_occ = mixw_occ; if (continuous == 1) { node->means = lmeans; node->vars = lvars; } node->occ = occ; node->wt_ent = wt_ent; ckd_free_2d((void **)dist); ckd_free((void *)dnom); return S3_SUCCESS; }
int main(int argc, char *argv[]) { char *lm_file; char *args_file; char *ngrams_file; char *lmLoadTimer = "LM Load"; char *lmLookupTimer = "LM Lookup"; char *ngrams[MAX_NGRAMS]; float64 lw, wip, uw, logbase; int i, n; int32 nwords[MAX_NGRAMS]; int scores[MAX_NGRAMS]; lm_t *lm; s3lmwid32_t *wid[MAX_NGRAMS]; if (argc < 3) { E_FATAL("USAGE: %s <lm_file> <args_file> <ngrams_file>\n", argv[0]); } args_file = argv[1]; lm_file = argv[2]; ngrams_file = argv[3]; parse_args_file(args_file); lw = cmd_ln_float32("-lw"); wip = cmd_ln_float32("-wip"); uw = cmd_ln_float32("-uw"); logbase = cmd_ln_float32("-logbase"); logs3_init(logbase, 1, 1); /*Report progress and use log table */ metricsStart(lmLoadTimer); /* initialize the language model */ /* HACK! This doesn't work for class-based LM */ lm = lm_read_advance(lm_file, "default", lw, wip, uw, 0, NULL, 1); metricsStop(lmLoadTimer); /* read in all the N-grams */ n = read_ngrams(ngrams_file, ngrams, wid, nwords, MAX_NGRAMS, lm); metricsStart(lmLookupTimer); /* scores the N-grams */ for (i = 0; i < n; i++) { scores[i] = score_ngram(wid[i], nwords[i], lm); } metricsStop(lmLookupTimer); for (i = 0; i < n; i++) { printf("%-10d %s\n", scores[i], ngrams[i]); } printf("Bigram misses: %d \n", lm->n_bg_bo); printf("Trigram misses: %d \n", lm->n_tg_bo); fflush(stdout); metricsPrint(); return 0; }