ps_seg_t * ps_seg_iter(ps_decoder_t *ps) { ps_seg_t *itor; ptmr_start(&ps->perf); itor = ps_search_seg_iter(ps->search); ptmr_stop(&ps->perf); return itor; }
ps_seg_t * ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score) { ps_seg_t *itor; ptmr_start(&ps->perf); itor = ps_search_seg_iter(ps->search, out_best_score); ptmr_stop(&ps->perf); return itor; }
int32 ps_get_prob(ps_decoder_t *ps) { int32 prob; ptmr_start(&ps->perf); prob = ps_search_prob(ps->search); ptmr_stop(&ps->perf); return prob; }
char const * ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final) { char const *hyp; ptmr_start(&ps->perf); hyp = ps_search_hyp(ps->search, NULL, out_is_final); ptmr_stop(&ps->perf); return hyp; }
char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score) { char const *hyp; ptmr_start(&ps->perf); hyp = ps_search_hyp(ps->search, out_best_score, NULL); ptmr_stop(&ps->perf); return hyp; }
static void utt_livepretend(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { char fullrawfn[FILENAME_LENGTH]; char *hypstr; short samples[SAMPLE_BUFFER_LENGTH]; float32 **frames; kb_t *kb; FILE *rawfd; int len, n_frames; kb = (kb_t *) data; /* report_utt_res(ur); */ sprintf(fullrawfn, "%s/%s%s", rawdirfn, ur->uttfile, decoder.rawext); if ((rawfd = fopen(fullrawfn, "rb")) == NULL) { E_FATAL("Cannnot open raw file %s.\n", fullrawfn); } /* temporary hack */ /* fread(waveheader, 1, 44, rawfd); */ if (ur->lmname != NULL) srch_set_lm((srch_t *) kb->srch, ur->lmname); if (ur->regmatname != NULL) kb_setmllr(ur->regmatname, ur->cb2mllrname, kb); if (s3_decode_begin_utt(&decoder, ur->uttfile) != S3_DECODE_SUCCESS) E_FATAL("Cannot begin utterance decoding.\n"); len = fread(samples, sizeof(short), SAMPLE_BUFFER_LENGTH, rawfd); while (len > 0) { ptmr_start(&(st->tm)); fe_process_utt(fe, samples, len, &frames, &n_frames); if (frames != NULL) { s3_decode_process(&decoder, frames, n_frames); ckd_free_2d((void **)frames); } ptmr_stop(&(st->tm)); if (S3_DECODE_SUCCESS == s3_decode_hypothesis(&decoder, NULL, &hypstr, NULL)) if (decoder.phypdump) E_INFO("PARTIAL_HYP: %s\n", hypstr); len = fread(samples, sizeof(short), SAMPLE_BUFFER_LENGTH, rawfd); } fclose(rawfd); s3_decode_end_utt(&decoder); }
int32 ps_get_prob(ps_decoder_t *ps, char const **out_uttid) { int32 prob; ptmr_start(&ps->perf); prob = ps_search_prob(ps->search); if (out_uttid) *out_uttid = ps->uttid; ptmr_stop(&ps->perf); return prob; }
char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score, char const **out_uttid) { char const *hyp; ptmr_start(&ps->perf); hyp = ps_search_hyp(ps->search, out_best_score); if (out_uttid) *out_uttid = ps->uttid; ptmr_stop(&ps->perf); return hyp; }
void ngram_fwdflat_finish(ngram_search_t *ngs) { int32 cf; destroy_fwdflat_chan(ngs); destroy_fwdflat_wordlist(ngs); bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); /* This is the number of frames processed. */ cf = ps_search_acmod(ngs)->output_frame; /* Add a mark in the backpointer table for one past the final frame. */ ngram_search_mark_bptable(ngs, cf); ptmr_stop(&ngs->fwdflat_perf); /* Print out some statistics. */ if (cf > 0) { double n_speech = (double)(cf + 1) / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); E_INFO("%8d words recognized (%d/fr)\n", ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1)); E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt, (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1)); E_INFO("%8d channels searched (%d/fr)\n", ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1)); E_INFO("%8d words searched (%d/fr)\n", ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1)); E_INFO("%8d word transitions (%d/fr)\n", ngs->st.n_fwdflat_word_transition, ngs->st.n_fwdflat_word_transition / (cf + 1)); E_INFO("fwdflat %.2f CPU %.3f xRT\n", ngs->fwdflat_perf.t_cpu, ngs->fwdflat_perf.t_cpu / n_speech); E_INFO("fwdflat %.2f wall %.3f xRT\n", ngs->fwdflat_perf.t_elapsed, ngs->fwdflat_perf.t_elapsed / n_speech); }
static int init_state(const char *obsdmp, const char *obsidx, uint32 n_density, uint32 n_stream, uint32 *veclen, uint32 blksize, int reest, const char *mixwfn, const char *meanfn, const char *varfn, uint32 ts_off, uint32 ts_cnt, uint32 n_ts, uint32 n_d_ts) { vector_t ***mean; vector_t ***var = NULL; vector_t ****fullvar = NULL; float32 ***mixw = NULL; uint32 n_frame; uint32 ignore = 0; codew_t *label; uint32 n_corpus = 0; float64 sqerr; float64 tot_sqerr; segdmp_type_t t; uint32 i, j, ts, n; int32 full_covar; full_covar = cmd_ln_int32("-fullvar"); /* fully-continuous for now */ mean = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (full_covar) fullvar = gauden_alloc_param_full(ts_cnt, n_stream, n_density, veclen); else var = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (mixwfn) mixw = (float32 ***)ckd_calloc_3d(ts_cnt, n_stream, n_density, sizeof(float32)); if (cmd_ln_str("-segidxfn")) { E_INFO("Multi-class dump\n"); if (segdmp_open_read(cmd_ln_str_list("-segdmpdirs"), cmd_ln_str("-segdmpfn"), cmd_ln_str("-segidxfn"), &n, &t, n_stream, veclen, blksize) != S3_SUCCESS) { E_FATAL("Unable to open dumps\n"); } if (n != n_d_ts) { E_FATAL("Expected %u tied-states in dump, but apparently %u\n", n_d_ts, n); } if (t != SEGDMP_TYPE_FEAT) { E_FATAL("Expected feature dump, but instead saw %u\n", t); } multiclass = TRUE; } else { E_INFO("1-class dump file\n"); multiclass = FALSE; dmp_fp = s3open(cmd_ln_str("-segdmpfn"), "rb", &dmp_swp); if (dmp_fp == NULL) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", cmd_ln_str("-segdmpfn")); return S3_ERROR; } if (s3read(&n_frame, sizeof(uint32), 1, dmp_fp, dmp_swp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", cmd_ln_str("-segdmpfn")); return S3_ERROR; } data_offset = ftell(dmp_fp); } tot_sqerr = 0; for (i = 0; i < ts_cnt; i++) { ts = ts_off + i; /* stride not accounted for yet */ if (o2d == NULL) { if (multiclass) n_frame = segdmp_n_seg(ts); } else { for (j = 0, n_frame = 0; j < n_o2d[ts]; j++) { n_frame += segdmp_n_seg(o2d[ts][j]); } } E_INFO("Corpus %u: sz==%u frames%s\n", ts, n_frame, (n_frame > cmd_ln_int32("-vartiethr") ? "" : " tied var")); if (n_frame == 0) { continue; } E_INFO("Convergence ratios are abs(cur - prior) / abs(prior)\n"); /* Do some variety of k-means clustering */ ptmr_start(&km_timer); sqerr = cluster(ts, n_stream, n_frame, veclen, blksize, mean[i], n_density, &label); ptmr_stop(&km_timer); if (sqerr < 0) { E_ERROR("Unable to do k-means for state %u; skipping...\n", ts); continue; } /* Given the k-means and assuming equal prior liklihoods * compute the variances */ ptmr_start(&var_timer); if (full_covar) full_variances(ts, mean[i], fullvar[i], n_density, n_stream, veclen, blksize, n_frame, label); else variances(ts, mean[i], var[i], n_density, n_stream, veclen, blksize, n_frame, label); ptmr_stop(&var_timer); if (mixwfn) { /* initialize the mixing weights by counting # of occurrances * of the top codeword over the corpus and normalizing */ init_mixw(mixw[i], mean[i], n_density, veclen, n_frame, n_stream, label); ckd_free(label); if (reest == TRUE && full_covar) E_ERROR("EM re-estimation is not yet supported for full covariances\n"); else if (reest == TRUE) { ptmr_start(&em_timer); /* Do iterations of EM to estimate the mixture densities */ reest_sum(ts, mean[i], var[i], mixw[i], n_density, n_stream, n_frame, veclen, blksize, cmd_ln_int32("-niter"), FALSE, cmd_ln_int32("-vartiethr")); ptmr_stop(&em_timer); } } ++n_corpus; tot_sqerr += sqerr; E_INFO("sqerr [%u] == %e\n", ts, sqerr); } if (n_corpus > 0) { E_INFO("sqerr = %e tot %e rms\n", tot_sqerr, sqrt(tot_sqerr/n_corpus)); } if (!multiclass) s3close(dmp_fp); if (meanfn) { if (s3gau_write(meanfn, (const vector_t ***)mean, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mean file given; none written\n"); } if (varfn) { if (full_covar) { if (s3gau_write_full(varfn, (const vector_t ****)fullvar, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else { if (s3gau_write(varfn, (const vector_t ***)var, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } } else { E_INFO("No variance file given; none written\n"); } if (mixwfn) { if (s3mixw_write(mixwfn, mixw, ts_cnt, n_stream, n_density) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mixing weight file given; none written\n"); } return S3_SUCCESS; }
static void utt_livepretend(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { char fullrawfn[FILENAME_LENGTH]; char *hypstr; cont_ad_t *cont_ad; ad_rec_t bogus_ad; short samples[SAMPLE_BUFFER_LENGTH]; float32 **frames; kb_t *kb; int nread, n_frames, seg_n_frames; int ts, listening; kb = (kb_t *) data; /* report_utt_res(ur); */ sprintf(fullrawfn, "%s/%s%s", rawdirfn, ur->uttfile, decoder.rawext); if ((rawfd = fopen(fullrawfn, "rb")) == NULL) { E_FATAL("Cannnot open raw file %s.\n", fullrawfn); } if (ur->lmname != NULL) srch_set_lm((srch_t *) kb->srch, ur->lmname); if (ur->regmatname != NULL) kb_setmllr(ur->regmatname, ur->cb2mllrname, kb); bogus_ad.sps = (int32) cmd_ln_float32_r(kb->kbcore->config, "-samprate"); if ((cont_ad = cont_ad_init(&bogus_ad, ad_file_read)) == NULL) { E_FATAL("Failed to initialize energy-based endpointer"); } listening = 0; ts = 0; seg_n_frames = 0; while ((nread = cont_ad_read(cont_ad, samples, SAMPLE_BUFFER_LENGTH)) >= 0) { if (nread) { ts = cont_ad->read_ts; if (!listening) { char uttid[FILENAME_LENGTH]; sprintf(uttid, "%s_%.3f", ur->uttfile, (double) ts / bogus_ad.sps); if (s3_decode_begin_utt(&decoder, uttid) != S3_DECODE_SUCCESS) E_FATAL("Cannot begin utterance decoding.\n"); listening = 1; } ptmr_start(&(st->tm)); fe_process_utt(fe, samples, nread, &frames, &n_frames); seg_n_frames += n_frames; if (frames != NULL) { s3_decode_process(&decoder, frames, n_frames); ckd_free_2d((void **)frames); } ptmr_stop(&(st->tm)); if (s3_decode_hypothesis(&decoder, NULL, &hypstr, NULL) == S3_DECODE_SUCCESS) { if (decoder.phypdump) { E_INFO("PARTIAL_HYP: %s\n", hypstr); } } /* If the segment is too long, break it. */ if (seg_n_frames > 15000) { s3_decode_end_utt(&decoder); listening = 0; } } else { if (listening && cont_ad->read_ts - ts > 8000) { /* HACK */ s3_decode_end_utt(&decoder); listening = 0; } } } fclose(rawfd); cont_ad_close(cont_ad); if (listening) s3_decode_end_utt(&decoder); }
int ps_end_utt(ps_decoder_t *ps) { int rv, i; acmod_end_utt(ps->acmod); /* Search any remaining frames. */ if ((rv = ps_search_forward(ps)) < 0) { ptmr_stop(&ps->perf); return rv; } /* Finish phone loop search. */ if (ps->phone_loop) { if ((rv = ps_search_finish(ps->phone_loop)) < 0) { ptmr_stop(&ps->perf); return rv; } } /* Search any frames remaining in the lookahead window. */ if (ps->acmod->output_frame >= ps->pl_window) { for (i = ps->acmod->output_frame - ps->pl_window; i < ps->acmod->output_frame; ++i) ps_search_step(ps->search, i); } /* Finish main search. */ if ((rv = ps_search_finish(ps->search)) < 0) { ptmr_stop(&ps->perf); return rv; } ptmr_stop(&ps->perf); /* Log a backtrace if requested. */ if (cmd_ln_boolean_r(ps->config, "-backtrace")) { const char* hyp; ps_seg_t *seg; int32 score; hyp = ps_get_hyp(ps, &score); if (hyp != NULL) { E_INFO("%s (%d)\n", hyp, score); E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", "word", "start", "end", "pprob", "ascr", "lscr", "lback"); for (seg = ps_seg_iter(ps, &score); seg; seg = ps_seg_next(seg)) { char const *word; int sf, ef; int32 post, lscr, ascr, lback; word = ps_seg_word(seg); ps_seg_frames(seg, &sf, &ef); post = ps_seg_prob(seg, &ascr, &lscr, &lback); E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", word, sf, ef, logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback); } } } return rv; }
ptmr_t ctl_process(const char *ctlfile, const char *ctllmfile, const char *ctlmllrfile, int32 nskip, int32 count, void (*func) (void *kb, utt_res_t * ur, int32 sf, int32 ef, char *uttid), void *kb) { FILE *fp; FILE *ctllmfp; FILE *ctlmllrfp; char uttfile[16384], uttid[4096]; char lmname[4096]; char regmatname[4096], cb2mllrname[4096]; char tmp[4096]; int32 sf, ef; utt_res_t *ur; ptmr_t tm; kb_t *k; k = (kb_t *) kb; ctllmfp = NULL; ctlmllrfp = NULL; ur = new_utt_res(); if (ctlfile) { if ((fp = fopen(ctlfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlfile); } else fp = stdin; if (ctllmfile) { E_INFO("LM is used in this session\n"); if ((ctllmfp = fopen(ctllmfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctllmfile); } if (ctlmllrfile) { E_INFO("MLLR is used in this session\n"); if ((ctlmllrfp = fopen(ctlmllrfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlmllrfile); } ptmr_init(&tm); if (nskip > 0) { E_INFO("Skipping %d entries at the beginning of %s\n", nskip, ctlfile); for (; nskip > 0; --nskip) { if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) { fclose(fp); return tm; } /*This checks the size of the control file of the lm in batch mode */ if (ctllmfile) { if (ctl_read_entry(ctllmfp, lmname, &sf, &ef, tmp) < 0) { fclose(ctllmfp); E_ERROR ("An LM control file is specified but LM cannot be read when skipping the %d-th sentence\n", nskip); return tm; } } /*This checks the size of the control file of the mllr in batch mode */ if (ctlmllrfile) { if (ctl_read_entry(ctlmllrfp, regmatname, &sf, &ef, tmp) < 0) { fclose(ctlmllrfp); E_ERROR ("A MLLR control file is specified but MLLR cannot be read when skipping the %d-th sentence\n", nskip); return tm; } } } } for (; count > 0; --count) { int32 tmp1, tmp2; if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) break; /*This checks the size of the control file in batch mode */ if (ctllmfile) { if (ctl_read_entry(ctllmfp, lmname, &tmp1, &tmp2, tmp) < 0) { fclose(ctllmfp); E_ERROR ("LM control file is specified but LM cannot be read when counting the %d-th sentence\n", count); break; } } if (ctlmllrfile) { if (ctl_read_entry (ctlmllrfp, regmatname, &tmp1, &tmp2, cb2mllrname) < 0) { E_ERROR ("MLLR control file is specified but MLLR cannot be read when counting the %d-th sentence\n", count); break; } if (tmp2 == -1) strcpy(cb2mllrname, ".1cls."); } /* Process this utterance */ ptmr_start(&tm); if (func) { utt_res_set_uttfile(ur, uttfile); if (ctllmfile) utt_res_set_lmname(ur, lmname); if (ctlmllrfile) { utt_res_set_regmatname(ur, regmatname); utt_res_set_cb2mllrname(ur, cb2mllrname); } (*func) (kb, ur, sf, ef, uttid); } ptmr_stop(&tm); E_INFO ("%s: %6.1f sec CPU, %6.1f sec Clk; TOT: %8.1f sec CPU, %8.1f sec Clk\n\n", uttid, tm.t_cpu, tm.t_elapsed, tm.t_tot_cpu, tm.t_tot_elapsed); ptmr_reset(&tm); } if (fp) fclose(fp); if (ctllmfp) fclose(ctllmfp); if (ctlmllrfp) fclose(ctlmllrfp); if (ur) ckd_free(ur); return tm; }
ptmr_t ctl_process_utt(const char *uttfile, int32 count, void (*func) (void *kb, utt_res_t * ur, int32 sf, int32 ef, char *uttid), void *kb) { char utterance_file[16384]; char uttid[4096]; const char *base; int32 i, c; int32 ts, newts; ptmr_t tm; utt_res_t *ur; ptmr_init(&tm); ur = new_utt_res(); base = path2basename(uttfile); /* strip_fileext() copies base to uttid. So, copying uttid to base * is redundant if strip_fileext() is not called. */ /* strip_fileext (base, uttid); strcpy (base, uttid); */ ts = -1; for (c = 0; c < count; c++) { /* Wait for uttfile to change from previous iteration */ for (i = 0;; i++) { newts = stat_mtime(uttfile); if ((newts >= 0) && (newts != ts)) break; if (i == 0) E_INFO("Waiting for %s, count %d, c %d\n", uttfile, count, c); SLEEP_SEC(1); } ts = newts; /* Form uttid */ sprintf(uttid, "%s_%08d", base, c); strncpy(utterance_file, uttfile, sizeof(utterance_file) - 1); utterance_file[sizeof(utterance_file) - 1] = 0; /* Process this utterance */ ptmr_start(&tm); if (func) { utt_res_set_uttfile(ur, utterance_file); (*func) (kb, ur, 0, -1, uttid); } ptmr_stop(&tm); E_INFO ("%s: %6.1f sec CPU, %6.1f sec Clk; TOT: %8.1f sec CPU, %8.1f sec Clk\n\n", uttid, tm.t_cpu, tm.t_elapsed, tm.t_tot_cpu, tm.t_tot_elapsed); ptmr_reset(&tm); } if (ur) free_utt_res(ur); return tm; }
float64 vector_vqgen (float32 **data, int32 rows, int32 cols, int32 vqrows, float64 epsilon, int32 maxiter, float32 **mean, int32 *map) { int32 i, j, r, it; static uint32 seed = 1; float64 sqerr, prev_sqerr=0, t; bitvec_t sel; int32 *count; float32 *gmean; ptmr_t tm; assert ((rows >= vqrows) && (maxiter >= 0) && (epsilon > 0.0)); sel = bitvec_alloc (rows); ptmr_init (&tm); ptmr_start (&tm); /* Pick a random initial set of centroids */ #ifndef WIN32 /* RAH */ srandom (seed); seed ^= random(); #else /* RAH */ srand ((unsigned) time(NULL)); /* RAH */ #endif for (i = 0; i < vqrows; i++) { /* Find r = a random, previously unselected row from the input */ #ifndef WIN32 /* RAH */ r = (random() & (int32)0x7fffffff) % rows; #else /* RAH */ r = (rand() & (int32)0x7fffffff) % rows; /* RAH */ #endif /* RAH */ while (bitvec_is_set (sel, r)) { /* BUG: possible infinite loop!! */ if (++r >= rows) r = 0; } bitvec_set (sel, r); memcpy ((void *)(mean[i]), (void *)(data[r]), cols * sizeof(float32)); /* BUG: What if two randomly selected rows are identical in content?? */ } bitvec_free (sel); count = (int32 *) ckd_calloc (vqrows, sizeof(int32)); /* In k-means, unmapped means in any iteration are a problem. Replace them with gmean */ gmean = (float32 *) ckd_calloc (cols, sizeof(float32)); vector_mean (gmean, mean, vqrows, cols); for (it = 0;; it++) { /* Iterations of k-means algorithm */ /* Find the current data->mean mappings (labels) */ sqerr = 0.0; for (i = 0; i < rows; i++) { map[i] = vector_vqlabel (data[i], mean, vqrows, cols, &t); sqerr += t; } ptmr_stop(&tm); if (it == 0) E_INFO("Iter %4d: %.1fs CPU; sqerr= %e\n", it, tm.t_cpu, sqerr); else E_INFO("Iter %4d: %.1fs CPU; sqerr= %e; delta= %e\n", it, tm.t_cpu, sqerr, (prev_sqerr-sqerr)/prev_sqerr); /* Check if exit condition satisfied */ if ((sqerr == 0.0) || (it >= maxiter-1) || ((it > 0) && ( ((prev_sqerr - sqerr) / prev_sqerr) < epsilon )) ) break; prev_sqerr = sqerr; ptmr_start(&tm); /* Update (reestimate) means */ for (i = 0; i < vqrows; i++) { for (j = 0; j < cols; j++) mean[i][j] = 0.0; count[i] = 0; } for (i = 0; i < rows; i++) { vector_accum (mean[map[i]], data[i], cols); count[map[i]]++; } for (i = 0; i < vqrows; i++) { if (count[i] > 1) { t = 1.0 / (float64)(count[i]); for (j = 0; j < cols; j++) /* mean[i][j] *= t; */ /* RAH, compiler was complaining about this, */ mean[i][j] = (float32) ((float64) mean[i][j] * (float64) t); /* */ } else if (count[i] == 0) { E_ERROR("Iter %d: mean[%d] unmapped\n", it, i); memcpy (mean[i], gmean, cols * sizeof(float32)); } } } ckd_free (count); ckd_free (gmean); return sqerr; }
/* Decode the given mfc file and write result to given directory */ static void utt_astar(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { char dagfile[1024], nbestfile[1024]; const char *latdir; const char *latext; const char *nbestext; dag_t *dag; int32 nfrm; if (ur->lmname) lmset_set_curlm_wname(lmset, ur->lmname); latdir = cmd_ln_str_r(config, "-inlatdir"); latext = cmd_ln_str_r(config, "-latext"); nbestext = cmd_ln_str_r(config, "-nbestext"); if (latdir) { build_output_uttfile(dagfile, latdir, uttid, ur->uttfile); strcat(dagfile, "."); strcat(dagfile, latext); } else sprintf(dagfile, "%s.%s", uttid, latext); ptmr_reset(&tm_utt); ptmr_start(&tm_utt); nfrm = 0; if ((dag = dag_load(dagfile, cmd_ln_int32_r(config, "-maxedge"), cmd_ln_float32_r(config, "-logbase"), cmd_ln_int32_r(config, "-dagfudge"), dict, fpen, config, logmath)) != NULL) { if (dict_filler_word(dict, dag->end->wid)) dag->end->wid = dict->finishwid; dag_remove_unreachable(dag); if (dag_bypass_filler_nodes(dag, 1.0, dict, fpen) < 0) { E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge); goto search_done; } dag_compute_hscr(dag, dict, lmset->cur_lm, 1.0); dag_remove_bypass_links(dag); E_INFO("%5d frames, %6d nodes, %8d edges, %8d bypass\n", dag->nfrm, dag->nnode, dag->nlink, dag->nbypass); nfrm = dag->nfrm; build_output_uttfile(nbestfile, nbestdir, uttid, ur->uttfile); strcat(nbestfile, "."); strcat(nbestfile, nbestext); nbest_search(dag, nbestfile, uttid, 1.0, dict, lmset->cur_lm, fpen); lm_cache_stats_dump(lmset->cur_lm); lm_cache_reset(lmset->cur_lm); } else E_ERROR("Dag load (%s) failed\n", uttid); search_done: dag_destroy(dag); ptmr_stop(&tm_utt); printf("%s: TMR: %5d Frm", uttid, nfrm); if (nfrm > 0) { printf(" %6.2f xEl", tm_utt.t_elapsed * 100.0 / nfrm); printf(" %6.2f xCPU", tm_utt.t_cpu * 100.0 / nfrm); } printf("\n"); fflush(stdout); }
static void decode_utt (void *data, char *uttfile, int32 sf, int32 ef, char *uttid) { kb_t *kb; acoustic_t *am; int32 featwin, nfr, min_utt_frames, n_vithist; char cepfile[4096], latfile[4096]; vithist_t *finalhist; int32 i, f; glist_t hyplist; FILE *latfp; printf ("\n"); fflush (stdout); E_INFO("Utterance %s\n", uttid); kb = (kb_t *)data; am = kb->am; featwin = feat_window_size(am->fcb); /* Build complete cepfile name and read cepstrum data; check for min length */ ctl_infile (cepfile, cmd_ln_str("-cepdir"), cmd_ln_str("-cepext"), uttfile); if ((nfr = s2mfc_read (cepfile, sf, ef, featwin, am->mfc, S3_MAX_FRAMES)) < 0) { E_ERROR("%s: MFC read failed\n", uttid); return; } E_INFO("%s: %d frames\n", uttid, nfr-(featwin<<1)); ptmr_reset (kb->tm); ptmr_reset (kb->tm_search); ptmr_start (kb->tm); min_utt_frames = (featwin<<1) + 1; if (nfr < min_utt_frames) { E_ERROR("%s: Utterance shorter than %d frames; ignored\n", uttid, min_utt_frames, nfr); return; } /* CMN/AGC */ if (strcmp (cmd_ln_str("-cmn"), "current") == 0) cmn (am->mfc, nfr, feat_cepsize(am->fcb)); if (strcmp (cmd_ln_str("-agc"), "max") == 0) agc_max (am->mfc, nfr); /* Process utterance */ lextree_vit_start (kb, uttid); for (i = featwin, f = 0; i < nfr-featwin; i++, f++) { am->senscale[f] = acoustic_eval (am, i); ptmr_start (kb->tm_search); lextree_vit_frame (kb, f, uttid); printf (" %d,%d,%d", f, glist_count (kb->vithist[f]), glist_count (kb->lextree_active)); fflush (stdout); ptmr_stop (kb->tm_search); } printf ("\n"); finalhist = lextree_vit_end (kb, f, uttid); hyplist = vithist_backtrace (finalhist, kb->am->senscale); hyp_log (stdout, hyplist, _dict_wordstr, (void *)kb->dict); hyp_myfree (hyplist); printf ("\n"); /* Log the entire Viterbi word lattice */ sprintf (latfile, "%s.lat", uttid); if ((latfp = fopen(latfile, "w")) == NULL) { E_ERROR("fopen(%s,w) failed; using stdout\n", latfile); latfp = stdout; } n_vithist = vithist_log (latfp, kb->vithist, f, _dict_wordstr, (void *)kb->dict); if (latfp != stdout) fclose (latfp); else { printf ("\n"); fflush (stdout); } ptmr_stop (kb->tm); if (f > 0) { printf("TMR(%s): %5d frames; %.1fs CPU, %.2f xRT; %.1fs CPU(search), %.2f xRT; %.1fs Elapsed, %.2f xRT\n", uttid, f, kb->tm->t_cpu, kb->tm->t_cpu * 100.0 / f, kb->tm_search->t_cpu, kb->tm_search->t_cpu * 100.0 / f, kb->tm->t_elapsed, kb->tm->t_elapsed * 100.0 / f); printf("CTR(%s): %5d frames; %d Sen (%.1f/fr); %d HMM (%.1f/fr); %d Words (%.1f/fr)\n", uttid, f, kb->n_sen_eval, ((float64)kb->n_sen_eval) / f, kb->n_hmm_eval, ((float64)kb->n_hmm_eval) / f, n_vithist, ((float64) n_vithist) / f); } /* Cleanup */ glist_free (kb->lextree_active); kb->lextree_active = NULL; for (; f >= -1; --f) { /* I.e., including dummy START_WORD node at frame -1 */ glist_myfree (kb->vithist[f], sizeof(vithist_t)); kb->vithist[f] = NULL; } lm_cache_reset (kb->lm); }
ptmr_t ctl_process(char *ctlfile, char *ctlmllrfile, int32 nskip, int32 count, void (*func) (void *kb, char *uttfile, int32 sf, int32 ef, char *uttid), void *kb) { FILE *fp, *mllrfp; char uttfile[16384], uttid[4096]; char regmatfile[4096], cb2mllrfile[4096]; int32 sf, ef; ptmr_t tm; mllrfp = NULL; E_INFO("Batch mode recognition without dynamic LM\n"); if (ctlfile) { if ((fp = fopen(ctlfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlfile); } else fp = stdin; if (ctlmllrfile) { if ((mllrfp = fopen(ctlmllrfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", ctlmllrfile); } ptmr_init(&tm); if (nskip > 0) { E_INFO("Skipping %d entries at the beginning of %s\n", nskip, ctlfile); for (; nskip > 0; --nskip) { if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) { fclose(fp); return tm; } } if (ctlmllrfile) { for (; nskip > 0; --nskip) { if (ctl_read_entry(fp, regmatfile, &sf, &ef, cb2mllrfile) < 0) { E_ERROR ("MLLR cannot be read when skipping the %d-th sentence\n", nskip); fclose(fp); return tm; } } } } for (; count > 0; --count) { if (ctl_read_entry(fp, uttfile, &sf, &ef, uttid) < 0) break; if (ctlmllrfile) { int32 tmp1, tmp2; if (ctl_read_entry (mllrfp, regmatfile, &tmp1, &tmp2, cb2mllrfile) < 0) { E_ERROR ("MLLR cannot be read when counting the %d-th sentence\n", count); break; } if (tmp2 == -1) strcpy(cb2mllrfile, ".1cls."); } /* Process this utterance */ ptmr_start(&tm); if (func) { if (ctlmllrfile) kb_setmllr(regmatfile, cb2mllrfile, kb); (*func) (kb, uttfile, sf, ef, uttid); } ptmr_stop(&tm); E_INFO ("%s: %6.1f sec CPU, %6.1f sec Clk; TOT: %8.1f sec CPU, %8.1f sec Clk\n\n", uttid, tm.t_cpu, tm.t_elapsed, tm.t_tot_cpu, tm.t_tot_elapsed); ptmr_reset(&tm); } if (fp) fclose(fp); return tm; }
int main(int argc, char *argv[]) { lexicon_t *lex; model_def_t *omdef; model_def_t *dmdef; feat_t *feat; uint32 n_stream, blksize; uint32 *veclen; uint32 ts_off; uint32 ts_cnt; FILE *fp; if (main_initialize(argc, argv, &lex, &omdef, &dmdef, &feat) != S3_SUCCESS) { return -1; } n_stream = feat_dimension1(feat); veclen = feat_stream_lengths(feat); blksize = feat_dimension(feat); if (strcmp(cmd_ln_str("-gthobj"), "state") == 0) { ts_off = cmd_ln_int32("-tsoff"); if (cmd_ln_str("-tscnt") == NULL) { ts_cnt = omdef->n_tied_state - ts_off; } else { ts_cnt = cmd_ln_int32("-tscnt"); } if (ts_off + ts_cnt > omdef->n_tied_state) { E_FATAL("Too many tied states specified\n"); } n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), cmd_ln_str("-segidxfn"), cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), ts_off, ts_cnt, omdef->n_tied_state, (dmdef != NULL ? dmdef->n_tied_state : omdef->n_tied_state)) != S3_SUCCESS) { E_ERROR("Unable to train [%u %u]\n", ts_off, ts_off+ts_cnt-1); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } if (cmd_ln_str("-tsrngfn") != NULL) { fp = fopen(cmd_ln_str("-tsrngfn"), "w"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", cmd_ln_str("-tsrngfn")); } fprintf(fp, "%d %d\n", ts_off, ts_cnt); } else if (ts_cnt != omdef->n_tied_state) { E_WARN("Subset of tied states specified, but no -tsrngfn arg"); } } else if (strcmp(cmd_ln_str("-gthobj"), "single") == 0) { n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), NULL, /* No index -> single class dump file */ cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), 0, 1, 1, 1) != S3_SUCCESS) { E_ERROR("Unable to train\n"); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } } return 0; }
/* * Find Viterbi alignment. */ static void align_utt(char *sent, /* In: Reference transcript */ int32 nfr, /* In: #frames of input */ char *ctlspec, /* In: Utt specifiction from control file */ char *uttid) { /* In: Utterance id, for logging and other use */ int32 i; align_stseg_t *stseg; align_phseg_t *phseg; align_wdseg_t *wdseg; int32 w; w = feat_window_size(kbcore_fcb(kbc)); /* #MFC vectors needed on either side of current frame to compute one feature vector */ if (nfr <= (w << 1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w << 1) + 1, nfr); return; } ptmr_reset_all(timers); ptmr_reset(&tm_utt); ptmr_start(&tm_utt); ptmr_reset(&tm_ovrhd); ptmr_start(&tm_ovrhd); ptmr_start(timers + tmr_utt); if (align_build_sent_hmm(sent, cmd_ln_int32_r(kbc->config, "-insert_sil")) != 0) { align_destroy_sent_hmm(); ptmr_stop(timers + tmr_utt); E_ERROR("No sentence HMM; no alignment for %s\n", uttid); return; } align_start_utt(uttid); for (i = 0; i < nfr; i++) { ptmr_start(timers + tmr_utt); /* Obtain active senone flags */ ptmr_start(timers + tmr_gauden); ptmr_start(timers + tmr_senone); align_sen_active(ascr->sen_active, ascr->n_sen); /* Bah, there ought to be a function for this. */ if (kbc->ms_mgau) { ms_cont_mgau_frame_eval(ascr, kbc->ms_mgau, kbc->mdef, feat[i], i); } else if (kbc->s2_mgau) { s2_semi_mgau_frame_eval(kbc->s2_mgau, ascr, fastgmm, feat[i], i); } else if (kbc->mgau) { approx_cont_mgau_ci_eval(kbcore_svq(kbc), kbcore_gs(kbc), kbcore_mgau(kbc), fastgmm, kbc->mdef, feat[i][0], ascr->cache_ci_senscr[0], &(ascr->cache_best_list[0]), i, kbcore_logmath(kbc)); approx_cont_mgau_frame_eval(kbcore_mdef(kbc), kbcore_svq(kbc), kbcore_gs(kbc), kbcore_mgau(kbc), fastgmm, ascr, feat[i][0], i, ascr-> cache_ci_senscr[0], &tm_ovrhd, kbcore_logmath(kbc)); } ptmr_stop(timers + tmr_gauden); ptmr_stop(timers + tmr_senone); /* Step alignment one frame forward */ ptmr_start(timers + tmr_align); align_frame(ascr->senscr); ptmr_stop(timers + tmr_align); ptmr_stop(timers + tmr_utt); } ptmr_stop(&tm_utt); ptmr_stop(&tm_ovrhd); printf("\n"); /* Wind up alignment for this utterance */ if (align_end_utt(&stseg, &phseg, &wdseg) < 0) E_ERROR("Final state not reached; no alignment for %s\n\n", uttid); else { if (s2stsegdir) write_s2stseg(s2stsegdir, stseg, uttid, ctlspec, cmd_ln_boolean_r(kbc->config, "-s2cdsen")); if (stsegdir) write_stseg(stsegdir, stseg, uttid, ctlspec); if (phsegdir) write_phseg(phsegdir, phseg, uttid, ctlspec); if (phlabdir) write_phlab(phlabdir, phseg, uttid, ctlspec, cmd_ln_int32_r(kbc->config, "-frate")); if (wdsegdir) write_wdseg(wdsegdir, wdseg, uttid, ctlspec); if (outsentfp) write_outsent(outsentfp, wdseg, uttid); if (outctlfp) write_outctl(outctlfp, ctlspec); } align_destroy_sent_hmm(); ptmr_print_all(stdout, timers, nfr * 0.1); printf ("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n", nfr, tm_utt.t_cpu, tm_utt.t_cpu * 100.0 / nfr, tm_utt.t_elapsed, tm_utt.t_elapsed * 100.0 / nfr); tot_nfr += nfr; }
int32 baum_welch_update(float64 *log_forw_prob, vector_t **feature, uint32 n_obs, state_t *state, uint32 n_state, model_inventory_t *inv, float64 a_beam, float64 b_beam, float32 spthresh, s3phseg_t *phseg, int32 mixw_reest, int32 tmat_reest, int32 mean_reest, int32 var_reest, int32 pass2var, int32 var_is_full, FILE *pdumpfh, bw_timers_t *timers, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; float64 log_fp; /* accumulator for the log of the probability * of observing the input given the model */ uint32 t; /* time */ int ret; uint32 i,j; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Compute the scaled alpha variable and scale factors * for all states and time subject to the pruning constraints */ if (timers) ptmr_start(&timers->fwd_timer); /* * Debug? * E_INFO("Before Forward search\n"); */ ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state, n_state, inv, a_beam, phseg, timers, 0); #if BW_DEBUG for (i=0 ; i < n_obs; i++) { E_INFO("Number of active states %d at time %d\n",n_active_astate[i],i); E_INFO("Scale of time %d is %e \n",i,scale[i]); for(j=0 ; j < n_active_astate[i]; j++) { E_INFO("Active state: %d Active alpha: %e\n",active_astate[i][j], active_alpha[i][j]); } } i=0; j=0; #endif /* Dump a phoneme segmentation if requested */ if (cmd_ln_str("-outphsegdir")) { const char *phsegdir; char *segfn, *uttid; phsegdir = cmd_ln_str("-outphsegdir"); uttid = (cmd_ln_int32("-outputfullpath") ? corpus_utt_full_name() : corpus_utt()); segfn = ckd_calloc(strlen(phsegdir) + 1 + strlen(uttid) + strlen(".phseg") + 1, 1); strcpy(segfn, phsegdir); strcat(segfn, "/"); strcat(segfn, uttid); strcat(segfn, ".phseg"); write_phseg(segfn, inv, state, active_astate, n_active_astate, n_state, n_obs, active_alpha, scale, bp); ckd_free(segfn); } if (timers) ptmr_stop(&timers->fwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto error; } /* Compute the scaled beta variable and update the reestimation * sums */ if (timers) ptmr_start(&timers->bwd_timer); #if BW_DEBUG E_INFO("Before Backward search\n"); #endif ret = backward_update(active_alpha, active_astate, n_active_astate, scale, dscale, feature, n_obs, state, n_state, inv, b_beam, spthresh, mixw_reest, tmat_reest, mean_reest, var_reest, pass2var, var_is_full, pdumpfh, timers, fcb); if (timers) ptmr_stop(&timers->bwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto error; } #if BW_DEBUG E_INFO("Before Global Accumulation\n"); #endif /* If no error was found in the forward or backward procedures, * add the resulting utterance reestimation accumulators to the * global reestimation accumulators */ if (timers) ptmr_start(&timers->rstu_timer); accum_global(inv, state, n_state, mixw_reest, tmat_reest, mean_reest, var_reest, var_is_full); if (timers) ptmr_stop(&timers->rstu_timer); for (i = 0; i < n_active_astate[n_obs-1] && active_astate[n_obs-1][i] != (n_state-1); i++); assert(i < n_active_astate[n_obs-1]); /* Calculate log[ p( O | \lambda ) ] */ assert(active_alpha[n_obs-1][i] > 0); log_fp = log(active_alpha[n_obs-1][i]); for (t = 0; t < n_obs; t++) { assert(scale[t] > 0); log_fp -= log(scale[t]); for (j = 0; j < inv->gauden->n_feat; j++) { log_fp += dscale[t][j]; } } *log_forw_prob = log_fp; ckd_free((void *)scale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)dscale[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void **)dscale); ckd_free(bp); return S3_SUCCESS; error: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free(bp); E_ERROR("%s ignored\n", corpus_utt_brief_name()); return S3_ERROR; }
static void process_reffile (char *reffile) { FILE *rfp, *hfp; char line[16384], uttid[4096], file[4096], lc_uttid[4096]; int32 i, k; dagnode_t ref[MAX_UTT_LEN]; int32 nref, noov, nhyp; int32 tot_err, tot_ref, tot_corr, tot_oov, tot_hyp; dag_t *dag; dpnode_t retval; ptmr_t *tm; char *latdir, *hypfile; if ((rfp = fopen(reffile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", reffile); latdir = (char *) cmd_ln_access ("-latdir"); hypfile = (char *) cmd_ln_access ("-hyp"); if ((! latdir) && (! hypfile)) E_FATAL("Both -latdir and -hyp arguments missing\n"); if (latdir && hypfile) E_FATAL("-latdir and -hyp arguments are mutually exclusive\n"); hfp = NULL; if (hypfile) { if ((hfp = fopen(hypfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", hypfile); } tot_err = 0; tot_ref = 0; tot_hyp = 0; tot_corr = 0; tot_oov = 0; tm = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t)); while (fgets(line, sizeof(line), rfp) != NULL) { ptmr_reset (tm); ptmr_start (tm); if ((nref = refline2wds (line, ref, &noov, uttid)) < 0) E_FATAL("Bad line in file %s: %s\n", reffile, line); /* Read lattice or hypfile, whichever is specified */ if (latdir) { sprintf (file, "%s/%s.lat", latdir, uttid); dag = dag_load (file); if (! dag) { /* Try lower casing uttid */ strcpy (lc_uttid, uttid); lcase (lc_uttid); sprintf (file, "%s/%s.lat", latdir, lc_uttid); dag = dag_load (file); } } else { if (fgets(line, sizeof(line), hfp) == NULL) E_FATAL("Premature EOF(%s) at uttid %s\n", hypfile, uttid); dag = hypline2dag (uttid, line); } if (dag) { /* Append sentinel silwid node to end of DAG */ dag_append_sentinel (dag, silwid); /* Find best path (returns #errors/#correct and updates *nhyp) */ retval = dp (uttid, dict, oovbegin, ref, nref, dag, &nhyp, 0, 1); dag_destroy (dag); } else { retval.c = 0; retval.e = nref-1; nhyp = 0; } ptmr_stop (tm); tot_ref += nref-1; tot_hyp += nhyp; tot_err += retval.e; tot_corr += retval.c; tot_oov += noov; printf("(%s) << %d ref; %d %.1f%% oov; %d hyp; %d %.1f%% corr; %d %.1f%% err; %.1fs CPU >>\n", uttid, nref-1, noov, (nref > 1) ? (noov * 100.0) / (nref-1) : 0.0, nhyp, retval.c, (nref > 1) ? (retval.c * 100.0) / (nref-1) : 0.0, retval.e, (nref > 1) ? (retval.e * 100.0) / (nref-1) : 0.0, tm->t_cpu); printf("== %7d ref; %5d %5.1f%% oov; %7d hyp; %7d %5.1f%% corr; %6d %5.1f%% err; %5.1fs CPU; %s\n", tot_ref, tot_oov, (tot_ref > 0) ? (tot_oov * 100.0) / tot_ref : 0.0, tot_hyp, tot_corr, (tot_ref > 0) ? (tot_corr * 100.0) / tot_ref : 0.0, tot_err, (tot_ref > 0) ? (tot_err * 100.0) / tot_ref : 0.0, tm->t_tot_cpu, uttid); fflush (stderr); fflush (stdout); } fclose (rfp); if (hfp) fclose (hfp); printf("SUMMARY: %d ref; %d %.3f%% oov; %d hyp; %d %.3f%% corr; %d %.3f%% err; %.1fs CPU\n", tot_ref, tot_oov, (tot_ref > 0) ? (tot_oov * 100.0) / tot_ref : 0.0, tot_hyp, tot_corr, (tot_ref > 0) ? (tot_corr * 100.0) / tot_ref : 0.0, tot_err, (tot_ref > 0) ? (tot_err * 100.0) / tot_ref : 0.0, tm->t_tot_cpu); }
int32 viterbi_update(float64 *log_forw_prob, vector_t **feature, uint32 n_obs, state_t *state_seq, uint32 n_state, model_inventory_t *inv, float64 a_beam, float32 spthresh, s3phseg_t *phseg, int32 mixw_reest, int32 tmat_reest, int32 mean_reest, int32 var_reest, int32 pass2var, int32 var_is_full, FILE *pdumpfh, bw_timers_t *timers, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; gauden_t *g; /* Gaussian density parameters and reestimation sums */ float32 ***mixw; /* all mixing weights */ float64 ***now_den = NULL; /* Short for den[t] */ uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */ uint32 *active_cb; uint32 n_active_cb; float32 **tacc; /* Transition matrix reestimation sum accumulators for the utterance. */ float32 ***wacc; /* mixing weight reestimation sum accumulators for the utterance. */ float32 ***denacc = NULL; /* mean/var reestimation accumulators for time t */ size_t denacc_size; /* Total size of data references in denacc. Allows for quick clears between time frames */ uint32 n_lcl_cb; uint32 *cb_inv; uint32 i, j, q; int32 t; uint32 n_feat; uint32 n_density; uint32 n_top; int ret; float64 log_fp; /* accumulator for the log of the probability * of observing the input given the model */ uint32 max_n_next = 0; uint32 n_cb; static float64 *p_op = NULL; static float64 *p_ci_op = NULL; static float64 **d_term = NULL; static float64 **d_term_ci = NULL; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); g = inv->gauden; n_feat = gauden_n_feat(g); n_density = gauden_n_density(g); n_top = gauden_n_top(g); n_cb = gauden_n_mgau(g); if (p_op == NULL) { p_op = ckd_calloc(n_feat, sizeof(float64)); p_ci_op = ckd_calloc(n_feat, sizeof(float64)); } if (d_term == NULL) { d_term = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); } scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); active_cb = ckd_calloc(2*n_state, sizeof(uint32)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Run forward algorithm, which has embedded Viterbi. */ if (timers) ptmr_start(&timers->fwd_timer); ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state_seq, n_state, inv, a_beam, phseg, timers, 0); /* Dump a phoneme segmentation if requested */ if (cmd_ln_str("-outphsegdir")) { const char *phsegdir; char *segfn, *uttid; phsegdir = cmd_ln_str("-outphsegdir"); uttid = (cmd_ln_int32("-outputfullpath") ? corpus_utt_full_name() : corpus_utt()); segfn = ckd_calloc(strlen(phsegdir) + 1 + strlen(uttid) + strlen(".phseg") + 1, 1); strcpy(segfn, phsegdir); strcat(segfn, "/"); strcat(segfn, uttid); strcat(segfn, ".phseg"); write_phseg(segfn, inv, state_seq, active_astate, n_active_astate, n_state, n_obs, active_alpha, scale, bp); ckd_free(segfn); } if (timers) ptmr_stop(&timers->fwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto all_done; } mixw = inv->mixw; if (mixw_reest) { /* Need to reallocate mixing accumulators for utt */ if (inv->l_mixw_acc) { ckd_free_3d((void ***)inv->l_mixw_acc); inv->l_mixw_acc = NULL; } inv->l_mixw_acc = (float32 ***)ckd_calloc_3d(inv->n_mixw_inverse, n_feat, n_density, sizeof(float32)); } wacc = inv->l_mixw_acc; n_lcl_cb = inv->n_cb_inverse; cb_inv = inv->cb_inverse; /* Allocate local accumulators for mean, variance reestimation sums if necessary */ gauden_alloc_l_acc(g, n_lcl_cb, mean_reest, var_reest, var_is_full); if (tmat_reest) { if (inv->l_tmat_acc) { ckd_free_2d((void **)inv->l_tmat_acc); inv->l_tmat_acc = NULL; } for (i = 0; i < n_state; i++) { if (state_seq[i].n_next > max_n_next) max_n_next = state_seq[i].n_next; } inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state, max_n_next, sizeof(float32)); } /* transition matrix reestimation sum accumulators for the utterance */ tacc = inv->l_tmat_acc; n_active_cb = 0; now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(float64)); now_den_idx = (uint32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(uint32)); if (mean_reest || var_reest) { /* allocate space for the per frame density counts */ denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_density, sizeof(float32)); /* # of bytes required to store all weighted vectors */ denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32); } else { denacc = NULL; denacc_size = 0; } /* Okay now run through the backtrace and accumulate counts. */ /* Find the non-emitting ending state */ for (q = 0; q < n_active_astate[n_obs-1]; ++q) { if (active_astate[n_obs-1][q] == n_state-1) break; } if (q == n_active_astate[n_obs-1]) { E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n"); ret = S3_ERROR; goto all_done; } for (t = n_obs-1; t >= 0; --t) { uint32 l_cb; uint32 l_ci_cb; float64 op, p_reest_term; uint32 prev; j = active_astate[t][q]; /* Follow any non-emitting states at time t first. */ while (state_seq[j].mixw == TYING_NON_EMITTING) { prev = active_astate[t][bp[t][q]]; #if VITERBI_DEBUG printf("Following non-emitting state at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j - prev] += 1.0; } q = bp[t][q]; j = prev; } /* Now accumulate statistics for the real state. */ l_cb = state_seq[j].l_cb; l_ci_cb = state_seq[j].l_ci_cb; n_active_cb = 0; if (timers) ptmr_start(&timers->gau_timer); gauden_compute_log(now_den[l_cb], now_den_idx[l_cb], feature[t], g, state_seq[j].cb, NULL); active_cb[n_active_cb++] = l_cb; if (l_cb != l_ci_cb) { gauden_compute_log(now_den[l_ci_cb], now_den_idx[l_ci_cb], feature[t], g, state_seq[j].ci_cb, NULL); active_cb[n_active_cb++] = l_ci_cb; } gauden_scale_densities_bwd(now_den, now_den_idx, &dscale[t], active_cb, n_active_cb, g); assert(state_seq[j].mixw != TYING_NON_EMITTING); /* Now calculate mixture densities. */ /* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */ op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], g); if (timers) ptmr_stop(&timers->gau_timer); if (timers) ptmr_start(&timers->rsts_timer); /* Make up this bogus value to be consistent with backward.c */ p_reest_term = 1.0 / op; /* Compute the output probability excluding the contribution * of each feature stream. i.e. p_op[0] is the output * probability excluding feature stream 0 */ partial_op(p_op, op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); /* compute the probability of each (of possibly topn) density */ den_terms(d_term, p_reest_term, p_op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); if (l_cb != l_ci_cb) { /* For each feature stream f, compute: * sum_k(mixw[f][k] den[f][k]) * and store the results in p_ci_op */ partial_ci_op(p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); /* For each feature stream and density compute the terms: * w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j * and store results in d_term_ci */ den_terms_ci(d_term_ci, 1.0, /* post_j = 1.0 */ p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); } /* accumulate the probability for each density in the mixing * weight reestimation accumulators */ if (mixw_reest) { accum_den_terms(wacc[state_seq[j].l_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); /* check if mixw and ci_mixw are different to avoid * doubling the EM counts in a CI run. */ if (state_seq[j].mixw != state_seq[j].ci_mixw) { if (n_cb < inv->n_mixw) { /* semi-continuous, tied mixture, and discrete case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); } else { /* continuous case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } } /* accumulate the probability for each density in the * density reestimation accumulators */ if (mean_reest || var_reest) { accum_den_terms(denacc[l_cb], d_term, now_den_idx[l_cb], n_feat, n_top); if (l_cb != l_ci_cb) { accum_den_terms(denacc[l_ci_cb], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } if (timers) ptmr_stop(&timers->rsts_timer); /* Note that there is only one state/frame so this is kind of redundant */ if (timers) ptmr_start(&timers->rstf_timer); if (mean_reest || var_reest) { /* Update the mean and variance reestimation accumulators */ if (pdumpfh) fprintf(pdumpfh, "time %d:\n", t); accum_gauden(denacc, cb_inv, n_lcl_cb, feature[t], now_den_idx, g, mean_reest, var_reest, pass2var, inv->l_mixw_acc, var_is_full, pdumpfh, fcb); memset(&denacc[0][0][0], 0, denacc_size); } if (timers) ptmr_stop(&timers->rstf_timer); if (t > 0) { prev = active_astate[t-1][bp[t][q]]; #if VITERBI_DEBUG printf("Backtrace at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j-prev] += 1.0; } q = bp[t][q]; j = prev; } } /* If no error was found, add the resulting utterance reestimation * accumulators to the global reestimation accumulators */ if (timers) ptmr_start(&timers->rstu_timer); accum_global(inv, state_seq, n_state, mixw_reest, tmat_reest, mean_reest, var_reest, var_is_full); if (timers) ptmr_stop(&timers->rstu_timer); /* Find the final state */ for (i = 0; i < n_active_astate[n_obs-1]; ++i) { if (active_astate[n_obs-1][i] == n_state-1) break; } /* Calculate log[ p( O | \lambda ) ] */ assert(active_alpha[n_obs-1][i] > 0); log_fp = log(active_alpha[n_obs-1][i]); for (t = 0; t < n_obs; t++) { assert(scale[t] > 0); log_fp -= log(scale[t]); for (j = 0; j < inv->gauden->n_feat; j++) { log_fp += dscale[t][j]; } } *log_forw_prob = log_fp; all_done: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void *)active_cb); if (denacc) ckd_free_3d((void ***)denacc); if (now_den) ckd_free_3d((void ***)now_den); if (now_den_idx) ckd_free_3d((void ***)now_den_idx); if (ret != S3_SUCCESS) E_ERROR("%s ignored\n", corpus_utt_brief_name()); return ret; }