/* * Find Viterbi alignment. */ static void align_utt (char *sent, /* In: Reference transcript */ float32 **mfc, /* In: MFC cepstra for input utterance */ int32 nfr, /* In: #frames of input */ char *ctlspec, /* In: Utt specifiction from control file */ char *uttid) /* In: Utterance id, for logging and other use */ { static float32 **feat = NULL; static int32 w; static int32 topn; static gauden_dist_t ***dist; static int32 *senscr; static s3senid_t *sen_active; static int8 *mgau_active; static char *s2stsegdir; static char *stsegdir; static char *phsegdir; static char *wdsegdir; int32 i, s, sid, gid, n_sen_active, best; char *arg; align_stseg_t *stseg; align_phseg_t *phseg; align_wdseg_t *wdseg; if (! feat) { /* One-time allocation of necessary intermediate variables */ /* Allocate space for a feature vector */ feat = (float32 **) ckd_calloc (n_feat, sizeof(float32 *)); for (i = 0; i < n_feat; i++) feat[i] = (float32 *) ckd_calloc (featlen[i], sizeof(float32)); /* Allocate space for top-N codeword density values in a codebook */ w = feat_window_size (); /* #MFC vectors needed on either side of current frame to compute one feature vector */ topn = *((int32 *) cmd_ln_access("-topn")); if (topn > g->n_density) { E_ERROR("-topn argument (%d) > #density codewords (%d); set to latter\n", topn, g->n_density); topn = g->n_density; } dist = (gauden_dist_t ***) ckd_calloc_3d (g->n_mgau, n_feat, topn, sizeof(gauden_dist_t)); /* Space for one frame of senone scores, and per frame active flags */ senscr = (int32 *) ckd_calloc (sen->n_sen, sizeof(int32)); sen_active = (s3senid_t *) ckd_calloc (sen->n_sen, sizeof(s3senid_t)); mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8)); /* Note various output directories */ s2stsegdir = NULL; stsegdir = NULL; phsegdir = NULL; wdsegdir = NULL; if ((arg = (char *) cmd_ln_access ("-s2stsegdir")) != NULL) s2stsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-stsegdir")) != NULL) stsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-phsegdir")) != NULL) phsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-wdsegdir")) != NULL) wdsegdir = (char *) ckd_salloc (arg); } /* HACK HACKA HACK BHIKSHA if (nfr <= (w<<1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr); return; } END HACK HACKA HACK */ cyctimer_reset_all (); counter_reset_all (); timing_reset (tm_utt); timing_start (tm_utt); cyctimer_resume (tmr_utt); /* AGC and CMN */ arg = (char *) cmd_ln_access ("-cmn"); if (strcmp (arg, "current") == 0) norm_mean (mfc-4, nfr+8, cepsize); /* -4 HACKA HACK */ arg = (char *) cmd_ln_access ("-agc"); if (strcmp (arg, "max") == 0) agc_max (mfc, nfr); if (align_build_sent_hmm (sent) != 0) { align_destroy_sent_hmm (); cyctimer_pause (tmr_utt); E_ERROR("No sentence HMM; no alignment for %s\n", uttid); return; } align_start_utt (uttid); /* * A feature vector for frame f depends on input MFC vectors [f-w..f+w]. Hence * the feature vector corresponding to the first w and last w input frames is * undefined. We define them by simply replicating the first and last true * feature vectors (presumably silence regions). */ for (i = 0; i < nfr; i++) { cyctimer_resume (tmr_utt); /* Compute feature vector for current frame from input speech cepstra */ /* HACK HACKA HACK BHIKSHA if (i < w) feat_cep2feat (mfc+w, feat); else if (i >= nfr-w) feat_cep2feat (mfc+(nfr-w-1), feat); else END HACK HACKA HACK */ feat_cep2feat (mfc+i, feat); /* * Evaluate gaussian density codebooks and senone scores for input codeword. * Evaluate only active codebooks and senones. */ /* Obtain active senone flags */ cyctimer_resume (tmr_senone); align_sen_active (sen_active, sen->n_sen); /* Flag all CI senones to active if interpolating */ if (interp) { for (s = 0; s < mdef->n_ci_sen; s++) sen_active[s] = 1; } /* Turn active flags into list (for faster access) */ n_sen_active = 0; for (s = 0; s < mdef->n_sen; s++) { if (sen_active[s]) sen_active[n_sen_active++] = s; } cyctimer_pause (tmr_senone); /* Flag all active mixture-gaussian codebooks */ cyctimer_resume (tmr_gauden); for (gid = 0; gid < g->n_mgau; gid++) mgau_active[gid] = 0; for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; mgau_active[sen->mgau[sid]] = 1; } /* Compute topn gaussian density values (for active codebooks) */ for (gid = 0; gid < g->n_mgau; gid++) if (mgau_active[gid]) gauden_dist (g, gid, topn, feat, dist[gid]); cyctimer_pause (tmr_gauden); /* Evaluate active senones */ cyctimer_resume (tmr_senone); best = (int32) 0x80000000; for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; senscr[sid] = senone_eval (sen, sid, dist[sen->mgau[sid]], topn); if (best < senscr[sid]) best = senscr[sid]; } if (interp) { for (s = 0; s < n_sen_active; s++) { if ((sid = sen_active[s]) >= mdef->n_ci_sen) interp_cd_ci (interp, senscr, sid, mdef->cd2cisen[sid]); } } /* Normalize senone scores (interpolation above can only lower best score) */ for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; senscr[sid] -= best; } senscale[i] = best; cyctimer_pause (tmr_senone); /* Step alignment one frame forward */ cyctimer_resume (tmr_align); align_frame (senscr); cyctimer_pause (tmr_align); cyctimer_pause (tmr_utt); } timing_stop (tm_utt); printf ("\n"); /* Wind up alignment for this utterance */ if (align_end_utt (&stseg, &phseg, &wdseg) < 0) E_ERROR("Final state not reached; no alignment for %s\n\n", uttid); else { if (s2stsegdir) write_s2stseg (s2stsegdir, stseg, uttid, ctlspec); if (stsegdir) write_stseg (stsegdir, stseg, uttid, ctlspec); if (phsegdir) write_phseg (phsegdir, phseg, uttid, ctlspec); if (wdsegdir) write_wdseg (wdsegdir, wdseg, uttid, ctlspec); if (outsentfp) write_outsent (outsentfp, wdseg, uttid); } align_destroy_sent_hmm (); cyctimer_print_all_norm (stdout, nfr*0.01, tmr_utt); counter_print_all (stdout); printf("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n", nfr, tm_utt->t_cpu, tm_utt->t_cpu * 100.0 / nfr, tm_utt->t_elapsed, tm_utt->t_elapsed * 100.0 / nfr); tot_nfr += nfr; }
/* * Find Viterbi alignment. */ static void align_utt(char *sent, /* In: Reference transcript */ int32 nfr, /* In: #frames of input */ char *ctlspec, /* In: Utt specifiction from control file */ char *uttid) { /* In: Utterance id, for logging and other use */ int32 i; align_stseg_t *stseg; align_phseg_t *phseg; align_wdseg_t *wdseg; int32 w; w = feat_window_size(kbcore_fcb(kbc)); /* #MFC vectors needed on either side of current frame to compute one feature vector */ if (nfr <= (w << 1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w << 1) + 1, nfr); return; } ptmr_reset_all(timers); ptmr_reset(&tm_utt); ptmr_start(&tm_utt); ptmr_reset(&tm_ovrhd); ptmr_start(&tm_ovrhd); ptmr_start(timers + tmr_utt); if (align_build_sent_hmm(sent, cmd_ln_int32_r(kbc->config, "-insert_sil")) != 0) { align_destroy_sent_hmm(); ptmr_stop(timers + tmr_utt); E_ERROR("No sentence HMM; no alignment for %s\n", uttid); return; } align_start_utt(uttid); for (i = 0; i < nfr; i++) { ptmr_start(timers + tmr_utt); /* Obtain active senone flags */ ptmr_start(timers + tmr_gauden); ptmr_start(timers + tmr_senone); align_sen_active(ascr->sen_active, ascr->n_sen); /* Bah, there ought to be a function for this. */ if (kbc->ms_mgau) { ms_cont_mgau_frame_eval(ascr, kbc->ms_mgau, kbc->mdef, feat[i], i); } else if (kbc->s2_mgau) { s2_semi_mgau_frame_eval(kbc->s2_mgau, ascr, fastgmm, feat[i], i); } else if (kbc->mgau) { approx_cont_mgau_ci_eval(kbcore_svq(kbc), kbcore_gs(kbc), kbcore_mgau(kbc), fastgmm, kbc->mdef, feat[i][0], ascr->cache_ci_senscr[0], &(ascr->cache_best_list[0]), i, kbcore_logmath(kbc)); approx_cont_mgau_frame_eval(kbcore_mdef(kbc), kbcore_svq(kbc), kbcore_gs(kbc), kbcore_mgau(kbc), fastgmm, ascr, feat[i][0], i, ascr-> cache_ci_senscr[0], &tm_ovrhd, kbcore_logmath(kbc)); } ptmr_stop(timers + tmr_gauden); ptmr_stop(timers + tmr_senone); /* Step alignment one frame forward */ ptmr_start(timers + tmr_align); align_frame(ascr->senscr); ptmr_stop(timers + tmr_align); ptmr_stop(timers + tmr_utt); } ptmr_stop(&tm_utt); ptmr_stop(&tm_ovrhd); printf("\n"); /* Wind up alignment for this utterance */ if (align_end_utt(&stseg, &phseg, &wdseg) < 0) E_ERROR("Final state not reached; no alignment for %s\n\n", uttid); else { if (s2stsegdir) write_s2stseg(s2stsegdir, stseg, uttid, ctlspec, cmd_ln_boolean_r(kbc->config, "-s2cdsen")); if (stsegdir) write_stseg(stsegdir, stseg, uttid, ctlspec); if (phsegdir) write_phseg(phsegdir, phseg, uttid, ctlspec); if (phlabdir) write_phlab(phlabdir, phseg, uttid, ctlspec, cmd_ln_int32_r(kbc->config, "-frate")); if (wdsegdir) write_wdseg(wdsegdir, wdseg, uttid, ctlspec); if (outsentfp) write_outsent(outsentfp, wdseg, uttid); if (outctlfp) write_outctl(outctlfp, ctlspec); } align_destroy_sent_hmm(); ptmr_print_all(stdout, timers, nfr * 0.1); printf ("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n", nfr, tm_utt.t_cpu, tm_utt.t_cpu * 100.0 / nfr, tm_utt.t_elapsed, tm_utt.t_elapsed * 100.0 / nfr); tot_nfr += nfr; }
int32 viterbi_update(float64 *log_forw_prob, vector_t **feature, uint32 n_obs, state_t *state_seq, uint32 n_state, model_inventory_t *inv, float64 a_beam, float32 spthresh, s3phseg_t *phseg, int32 mixw_reest, int32 tmat_reest, int32 mean_reest, int32 var_reest, int32 pass2var, int32 var_is_full, FILE *pdumpfh, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; gauden_t *g; /* Gaussian density parameters and reestimation sums */ float32 ***mixw; /* all mixing weights */ float64 ***now_den = NULL; /* Short for den[t] */ uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */ uint32 *active_cb; uint32 n_active_cb; float32 **tacc; /* Transition matrix reestimation sum accumulators for the utterance. */ float32 ***wacc; /* mixing weight reestimation sum accumulators for the utterance. */ float32 ***denacc = NULL; /* mean/var reestimation accumulators for time t */ size_t denacc_size; /* Total size of data references in denacc. Allows for quick clears between time frames */ uint32 n_lcl_cb; uint32 *cb_inv; uint32 i, j, q; int32 t; uint32 n_feat; uint32 n_density; uint32 n_top; int ret; timing_t *fwd_timer = NULL; timing_t *rstu_timer = NULL; timing_t *gau_timer = NULL; timing_t *rsts_timer = NULL; timing_t *rstf_timer = NULL; float64 log_fp; /* accumulator for the log of the probability * of observing the input given the model */ uint32 max_n_next = 0; uint32 n_cb; static float64 *p_op = NULL; static float64 *p_ci_op = NULL; static float64 **d_term = NULL; static float64 **d_term_ci = NULL; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); /* Get the forward estimation CPU timer */ fwd_timer = timing_get("fwd"); /* Get the per utterance reestimation CPU timer */ rstu_timer = timing_get("rstu"); /* Get the Gaussian density evaluation CPU timer */ gau_timer = timing_get("gau"); /* Get the per state reestimation CPU timer */ rsts_timer = timing_get("rsts"); /* Get the per frame reestimation CPU timer */ rstf_timer = timing_get("rstf"); g = inv->gauden; n_feat = gauden_n_feat(g); n_density = gauden_n_density(g); n_top = gauden_n_top(g); n_cb = gauden_n_mgau(g); if (p_op == NULL) { p_op = ckd_calloc(n_feat, sizeof(float64)); p_ci_op = ckd_calloc(n_feat, sizeof(float64)); } if (d_term == NULL) { d_term = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64)); } scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); active_cb = ckd_calloc(2*n_state, sizeof(uint32)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Run forward algorithm, which has embedded Viterbi. */ if (fwd_timer) timing_start(fwd_timer); ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state_seq, n_state, inv, a_beam, phseg, 0); /* Dump a phoneme segmentation if requested */ if (cmd_ln_str("-outphsegdir")) { const char *phsegdir; char *segfn, *uttid; phsegdir = cmd_ln_str("-outphsegdir"); uttid = (cmd_ln_int32("-outputfullpath") ? corpus_utt_full_name() : corpus_utt()); segfn = ckd_calloc(strlen(phsegdir) + 1 + strlen(uttid) + strlen(".phseg") + 1, 1); strcpy(segfn, phsegdir); strcat(segfn, "/"); strcat(segfn, uttid); strcat(segfn, ".phseg"); write_phseg(segfn, inv, state_seq, active_astate, n_active_astate, n_state, n_obs, active_alpha, scale, bp); ckd_free(segfn); } if (fwd_timer) timing_stop(fwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto all_done; } mixw = inv->mixw; if (mixw_reest) { /* Need to reallocate mixing accumulators for utt */ if (inv->l_mixw_acc) { ckd_free_3d((void ***)inv->l_mixw_acc); inv->l_mixw_acc = NULL; } inv->l_mixw_acc = (float32 ***)ckd_calloc_3d(inv->n_mixw_inverse, n_feat, n_density, sizeof(float32)); } wacc = inv->l_mixw_acc; n_lcl_cb = inv->n_cb_inverse; cb_inv = inv->cb_inverse; /* Allocate local accumulators for mean, variance reestimation sums if necessary */ gauden_alloc_l_acc(g, n_lcl_cb, mean_reest, var_reest, var_is_full); if (tmat_reest) { if (inv->l_tmat_acc) { ckd_free_2d((void **)inv->l_tmat_acc); inv->l_tmat_acc = NULL; } for (i = 0; i < n_state; i++) { if (state_seq[i].n_next > max_n_next) max_n_next = state_seq[i].n_next; } inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state, max_n_next, sizeof(float32)); } /* transition matrix reestimation sum accumulators for the utterance */ tacc = inv->l_tmat_acc; n_active_cb = 0; now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(float64)); now_den_idx = (uint32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_top, sizeof(uint32)); if (mean_reest || var_reest) { /* allocate space for the per frame density counts */ denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb, n_feat, n_density, sizeof(float32)); /* # of bytes required to store all weighted vectors */ denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32); } else { denacc = NULL; denacc_size = 0; } /* Okay now run through the backtrace and accumulate counts. */ /* Find the non-emitting ending state */ for (q = 0; q < n_active_astate[n_obs-1]; ++q) { if (active_astate[n_obs-1][q] == n_state-1) break; } if (q == n_active_astate[n_obs-1]) { E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n"); ret = S3_ERROR; goto all_done; } for (t = n_obs-1; t >= 0; --t) { uint32 l_cb; uint32 l_ci_cb; float64 op, p_reest_term; uint32 prev; j = active_astate[t][q]; /* Follow any non-emitting states at time t first. */ while (state_seq[j].mixw == TYING_NON_EMITTING) { prev = active_astate[t][bp[t][q]]; #if VITERBI_DEBUG printf("Following non-emitting state at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j - prev] += 1.0; } q = bp[t][q]; j = prev; } /* Now accumulate statistics for the real state. */ l_cb = state_seq[j].l_cb; l_ci_cb = state_seq[j].l_ci_cb; n_active_cb = 0; if (gau_timer) timing_start(gau_timer); gauden_compute_log(now_den[l_cb], now_den_idx[l_cb], feature[t], g, state_seq[j].cb, NULL); active_cb[n_active_cb++] = l_cb; if (l_cb != l_ci_cb) { gauden_compute_log(now_den[l_ci_cb], now_den_idx[l_ci_cb], feature[t], g, state_seq[j].ci_cb, NULL); active_cb[n_active_cb++] = l_ci_cb; } gauden_scale_densities_bwd(now_den, now_den_idx, &dscale[t], active_cb, n_active_cb, g); assert(state_seq[j].mixw != TYING_NON_EMITTING); /* Now calculate mixture densities. */ /* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */ op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], g); if (gau_timer) timing_stop(gau_timer); if (rsts_timer) timing_start(rsts_timer); /* Make up this bogus value to be consistent with backward.c */ p_reest_term = 1.0 / op; /* Compute the output probability excluding the contribution * of each feature stream. i.e. p_op[0] is the output * probability excluding feature stream 0 */ partial_op(p_op, op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); /* compute the probability of each (of possibly topn) density */ den_terms(d_term, p_reest_term, p_op, now_den[l_cb], now_den_idx[l_cb], mixw[state_seq[j].mixw], n_feat, n_top); if (l_cb != l_ci_cb) { /* For each feature stream f, compute: * sum_k(mixw[f][k] den[f][k]) * and store the results in p_ci_op */ partial_ci_op(p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); /* For each feature stream and density compute the terms: * w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j * and store results in d_term_ci */ den_terms_ci(d_term_ci, 1.0, /* post_j = 1.0 */ p_ci_op, now_den[l_ci_cb], now_den_idx[l_ci_cb], mixw[state_seq[j].ci_mixw], n_feat, n_top); } /* accumulate the probability for each density in the mixing * weight reestimation accumulators */ if (mixw_reest) { accum_den_terms(wacc[state_seq[j].l_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); /* check if mixw and ci_mixw are different to avoid * doubling the EM counts in a CI run. */ if (state_seq[j].mixw != state_seq[j].ci_mixw) { if (n_cb < inv->n_mixw) { /* semi-continuous, tied mixture, and discrete case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term, now_den_idx[l_cb], n_feat, n_top); } else { /* continuous case */ accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } } /* accumulate the probability for each density in the * density reestimation accumulators */ if (mean_reest || var_reest) { accum_den_terms(denacc[l_cb], d_term, now_den_idx[l_cb], n_feat, n_top); if (l_cb != l_ci_cb) { accum_den_terms(denacc[l_ci_cb], d_term_ci, now_den_idx[l_ci_cb], n_feat, n_top); } } if (rsts_timer) timing_stop(rsts_timer); /* Note that there is only one state/frame so this is kind of redundant */ if (rstf_timer) timing_start(rstf_timer); if (mean_reest || var_reest) { /* Update the mean and variance reestimation accumulators */ if (pdumpfh) fprintf(pdumpfh, "time %d:\n", t); accum_gauden(denacc, cb_inv, n_lcl_cb, feature[t], now_den_idx, g, mean_reest, var_reest, pass2var, inv->l_mixw_acc, var_is_full, pdumpfh, fcb); memset(&denacc[0][0][0], 0, denacc_size); } if (rstf_timer) timing_stop(rstf_timer); if (t > 0) { prev = active_astate[t-1][bp[t][q]]; #if VITERBI_DEBUG printf("Backtrace at time %d, %u => %u\n", t, j, prev); #endif /* Backtrace and accumulate transition counts. */ if (tmat_reest) { assert(tacc != NULL); tacc[prev][j-prev] += 1.0; } q = bp[t][q]; j = prev; } } /* If no error was found, add the resulting utterance reestimation * accumulators to the global reestimation accumulators */ if (rstu_timer) timing_start(rstu_timer); accum_global(inv, state_seq, n_state, mixw_reest, tmat_reest, mean_reest, var_reest, var_is_full); if (rstu_timer) timing_stop(rstu_timer); /* Find the final state */ for (i = 0; i < n_active_astate[n_obs-1]; ++i) { if (active_astate[n_obs-1][i] == n_state-1) break; } /* Calculate log[ p( O | \lambda ) ] */ assert(active_alpha[n_obs-1][i] > 0); log_fp = log(active_alpha[n_obs-1][i]); for (t = 0; t < n_obs; t++) { assert(scale[t] > 0); log_fp -= log(scale[t]); for (j = 0; j < inv->gauden->n_feat; j++) { log_fp += dscale[t][j]; } } *log_forw_prob = log_fp; all_done: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void *)active_cb); if (denacc) ckd_free_3d((void ***)denacc); if (now_den) ckd_free_3d((void ***)now_den); if (now_den_idx) ckd_free_3d((void ***)now_den_idx); if (ret != S3_SUCCESS) E_ERROR("%s ignored\n", corpus_utt_brief_name()); return ret; }
int32 baum_welch_update(float64 *log_forw_prob, vector_t **feature, uint32 n_obs, state_t *state, uint32 n_state, model_inventory_t *inv, float64 a_beam, float64 b_beam, float32 spthresh, s3phseg_t *phseg, int32 mixw_reest, int32 tmat_reest, int32 mean_reest, int32 var_reest, int32 pass2var, int32 var_is_full, FILE *pdumpfh, bw_timers_t *timers, feat_t *fcb) { float64 *scale = NULL; float64 **dscale = NULL; float64 **active_alpha; uint32 **active_astate; uint32 **bp; uint32 *n_active_astate; float64 log_fp; /* accumulator for the log of the probability * of observing the input given the model */ uint32 t; /* time */ int ret; uint32 i,j; /* caller must ensure that there is some non-zero amount of work to be done here */ assert(n_obs > 0); assert(n_state > 0); scale = (float64 *)ckd_calloc(n_obs, sizeof(float64)); dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32)); active_alpha = (float64 **)ckd_calloc(n_obs, sizeof(float64 *)); active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *)); /* Compute the scaled alpha variable and scale factors * for all states and time subject to the pruning constraints */ if (timers) ptmr_start(&timers->fwd_timer); /* * Debug? * E_INFO("Before Forward search\n"); */ ret = forward(active_alpha, active_astate, n_active_astate, bp, scale, dscale, feature, n_obs, state, n_state, inv, a_beam, phseg, timers, 0); #if BW_DEBUG for (i=0 ; i < n_obs; i++) { E_INFO("Number of active states %d at time %d\n",n_active_astate[i],i); E_INFO("Scale of time %d is %e \n",i,scale[i]); for(j=0 ; j < n_active_astate[i]; j++) { E_INFO("Active state: %d Active alpha: %e\n",active_astate[i][j], active_alpha[i][j]); } } i=0; j=0; #endif /* Dump a phoneme segmentation if requested */ if (cmd_ln_str("-outphsegdir")) { const char *phsegdir; char *segfn, *uttid; phsegdir = cmd_ln_str("-outphsegdir"); uttid = (cmd_ln_int32("-outputfullpath") ? corpus_utt_full_name() : corpus_utt()); segfn = ckd_calloc(strlen(phsegdir) + 1 + strlen(uttid) + strlen(".phseg") + 1, 1); strcpy(segfn, phsegdir); strcat(segfn, "/"); strcat(segfn, uttid); strcat(segfn, ".phseg"); write_phseg(segfn, inv, state, active_astate, n_active_astate, n_state, n_obs, active_alpha, scale, bp); ckd_free(segfn); } if (timers) ptmr_stop(&timers->fwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto error; } /* Compute the scaled beta variable and update the reestimation * sums */ if (timers) ptmr_start(&timers->bwd_timer); #if BW_DEBUG E_INFO("Before Backward search\n"); #endif ret = backward_update(active_alpha, active_astate, n_active_astate, scale, dscale, feature, n_obs, state, n_state, inv, b_beam, spthresh, mixw_reest, tmat_reest, mean_reest, var_reest, pass2var, var_is_full, pdumpfh, timers, fcb); if (timers) ptmr_stop(&timers->bwd_timer); if (ret != S3_SUCCESS) { /* Some problem with the utterance, release per utterance storage and * forget about adding the utterance accumulators to the global accumulators */ goto error; } #if BW_DEBUG E_INFO("Before Global Accumulation\n"); #endif /* If no error was found in the forward or backward procedures, * add the resulting utterance reestimation accumulators to the * global reestimation accumulators */ if (timers) ptmr_start(&timers->rstu_timer); accum_global(inv, state, n_state, mixw_reest, tmat_reest, mean_reest, var_reest, var_is_full); if (timers) ptmr_stop(&timers->rstu_timer); for (i = 0; i < n_active_astate[n_obs-1] && active_astate[n_obs-1][i] != (n_state-1); i++); assert(i < n_active_astate[n_obs-1]); /* Calculate log[ p( O | \lambda ) ] */ assert(active_alpha[n_obs-1][i] > 0); log_fp = log(active_alpha[n_obs-1][i]); for (t = 0; t < n_obs; t++) { assert(scale[t] > 0); log_fp -= log(scale[t]); for (j = 0; j < inv->gauden->n_feat; j++) { log_fp += dscale[t][j]; } } *log_forw_prob = log_fp; ckd_free((void *)scale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)dscale[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free((void **)dscale); ckd_free(bp); return S3_SUCCESS; error: ckd_free((void *)scale); for (i = 0; i < n_obs; i++) { if (dscale[i]) ckd_free((void *)dscale[i]); } ckd_free((void **)dscale); ckd_free(n_active_astate); for (i = 0; i < n_obs; i++) { ckd_free((void *)active_alpha[i]); ckd_free((void *)active_astate[i]); ckd_free((void *)bp[i]); } ckd_free((void *)active_alpha); ckd_free((void *)active_astate); ckd_free(bp); E_ERROR("%s ignored\n", corpus_utt_brief_name()); return S3_ERROR; }