/*
 * Find Viterbi alignment.
 */
static void align_utt (char *sent,	/* In: Reference transcript */
		       float32 **mfc,	/* In: MFC cepstra for input utterance */
		       int32 nfr,	/* In: #frames of input */
		       char *ctlspec,	/* In: Utt specifiction from control file */
		       char *uttid)	/* In: Utterance id, for logging and other use */
{
    static float32 **feat = NULL;
    static int32 w;
    static int32 topn;
    static gauden_dist_t ***dist;
    static int32 *senscr;
    static s3senid_t *sen_active;
    static int8 *mgau_active;
    static char *s2stsegdir;
    static char *stsegdir;
    static char *phsegdir;
    static char *wdsegdir;
    
    int32 i, s, sid, gid, n_sen_active, best;
    char *arg;
    align_stseg_t *stseg;
    align_phseg_t *phseg;
    align_wdseg_t *wdseg;

    if (! feat) {
	/* One-time allocation of necessary intermediate variables */

	/* Allocate space for a feature vector */
	feat = (float32 **) ckd_calloc (n_feat, sizeof(float32 *));
	for (i = 0; i < n_feat; i++)
	    feat[i] = (float32 *) ckd_calloc (featlen[i], sizeof(float32));
	
	/* Allocate space for top-N codeword density values in a codebook */
	w = feat_window_size ();	/* #MFC vectors needed on either side of current
					   frame to compute one feature vector */
	topn = *((int32 *) cmd_ln_access("-topn"));
	if (topn > g->n_density) {
	    E_ERROR("-topn argument (%d) > #density codewords (%d); set to latter\n",
		   topn, g->n_density);
	    topn = g->n_density;
	}
	dist = (gauden_dist_t ***) ckd_calloc_3d (g->n_mgau, n_feat, topn,
						  sizeof(gauden_dist_t));
	
	/* Space for one frame of senone scores, and per frame active flags */
	senscr = (int32 *) ckd_calloc (sen->n_sen, sizeof(int32));
	sen_active = (s3senid_t *) ckd_calloc (sen->n_sen, sizeof(s3senid_t));
	mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8));

	/* Note various output directories */
	s2stsegdir = NULL;
	stsegdir = NULL;
	phsegdir = NULL;
	wdsegdir = NULL;
	if ((arg = (char *) cmd_ln_access ("-s2stsegdir")) != NULL)
	    s2stsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-stsegdir")) != NULL)
	    stsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-phsegdir")) != NULL)
	    phsegdir = (char *) ckd_salloc (arg);
	if ((arg = (char *) cmd_ln_access ("-wdsegdir")) != NULL)
	    wdsegdir = (char *) ckd_salloc (arg);
    }
    
/* HACK HACKA HACK BHIKSHA 
    if (nfr <= (w<<1)) {
	E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr);
	return;
    }
 END HACK HACKA HACK */
    
    cyctimer_reset_all ();
    counter_reset_all ();
    
    timing_reset (tm_utt);
    timing_start (tm_utt);
    cyctimer_resume (tmr_utt);

    /* AGC and CMN */
    arg = (char *) cmd_ln_access ("-cmn");
    if (strcmp (arg, "current") == 0)
	norm_mean (mfc-4, nfr+8, cepsize); /* -4 HACKA HACK */
    arg = (char *) cmd_ln_access ("-agc");
    if (strcmp (arg, "max") == 0)
	agc_max (mfc, nfr);
    
    if (align_build_sent_hmm (sent) != 0) {
	align_destroy_sent_hmm ();
	cyctimer_pause (tmr_utt);

	E_ERROR("No sentence HMM; no alignment for %s\n", uttid);
	
	return;
    }
    
    align_start_utt (uttid);
    
    /*
     * A feature vector for frame f depends on input MFC vectors [f-w..f+w].  Hence
     * the feature vector corresponding to the first w and last w input frames is
     * undefined.  We define them by simply replicating the first and last true
     * feature vectors (presumably silence regions).
     */
    for (i = 0; i < nfr; i++) {
	cyctimer_resume (tmr_utt);
	
	/* Compute feature vector for current frame from input speech cepstra */
/* HACK HACKA HACK BHIKSHA 
	if (i < w)
	    feat_cep2feat (mfc+w, feat);
	else if (i >= nfr-w)
	    feat_cep2feat (mfc+(nfr-w-1), feat);
	else
END HACK HACKA HACK */
	    feat_cep2feat (mfc+i, feat);

	/*
	 * Evaluate gaussian density codebooks and senone scores for input codeword.
	 * Evaluate only active codebooks and senones.
	 */
	/* Obtain active senone flags */
	cyctimer_resume (tmr_senone);
	align_sen_active (sen_active, sen->n_sen);
	/* Flag all CI senones to active if interpolating */
	if (interp) {
	    for (s = 0; s < mdef->n_ci_sen; s++)
		sen_active[s] = 1;
	}
	/* Turn active flags into list (for faster access) */
	n_sen_active = 0;
	for (s = 0; s < mdef->n_sen; s++) {
	    if (sen_active[s])
		sen_active[n_sen_active++] = s;
	}
	cyctimer_pause (tmr_senone);
	
	/* Flag all active mixture-gaussian codebooks */
	cyctimer_resume (tmr_gauden);
	for (gid = 0; gid < g->n_mgau; gid++)
	    mgau_active[gid] = 0;
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    mgau_active[sen->mgau[sid]] = 1;
	}
	
	/* Compute topn gaussian density values (for active codebooks) */
	for (gid = 0; gid < g->n_mgau; gid++)
	    if (mgau_active[gid])
		gauden_dist (g, gid, topn, feat, dist[gid]);
	cyctimer_pause (tmr_gauden);
	
	/* Evaluate active senones */
	cyctimer_resume (tmr_senone);
	best = (int32) 0x80000000;
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    senscr[sid] = senone_eval (sen, sid, dist[sen->mgau[sid]], topn);
	    if (best < senscr[sid])
		best = senscr[sid];
	}
	if (interp) {
	    for (s = 0; s < n_sen_active; s++) {
		if ((sid = sen_active[s]) >= mdef->n_ci_sen)
		    interp_cd_ci (interp, senscr, sid, mdef->cd2cisen[sid]);
	    }
	}
	
	/* Normalize senone scores (interpolation above can only lower best score) */
	for (s = 0; s < n_sen_active; s++) {
	    sid = sen_active[s];
	    senscr[sid] -= best;
	}
	senscale[i] = best;
	cyctimer_pause (tmr_senone);
	
	/* Step alignment one frame forward */
	cyctimer_resume (tmr_align);
	align_frame (senscr);
	cyctimer_pause (tmr_align);
	
	cyctimer_pause (tmr_utt);
    }
    timing_stop (tm_utt);

    printf ("\n");

    /* Wind up alignment for this utterance */
    if (align_end_utt (&stseg, &phseg, &wdseg) < 0)
	E_ERROR("Final state not reached; no alignment for %s\n\n", uttid);
    else {
	if (s2stsegdir)
	    write_s2stseg (s2stsegdir, stseg, uttid, ctlspec);
	if (stsegdir)
	    write_stseg (stsegdir, stseg, uttid, ctlspec);
	if (phsegdir)
	    write_phseg (phsegdir, phseg, uttid, ctlspec);
	if (wdsegdir)
	    write_wdseg (wdsegdir, wdseg, uttid, ctlspec);
	if (outsentfp)
	    write_outsent (outsentfp, wdseg, uttid);
    }
    
    align_destroy_sent_hmm ();
    
    cyctimer_print_all_norm (stdout, nfr*0.01, tmr_utt);
    counter_print_all (stdout);

    printf("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n",
	   nfr,
	   tm_utt->t_cpu, tm_utt->t_cpu * 100.0 / nfr,
	   tm_utt->t_elapsed, tm_utt->t_elapsed * 100.0 / nfr);

    tot_nfr += nfr;
}
Exemple #2
0
/*
 * Find Viterbi alignment.
 */
static void
align_utt(char *sent,           /* In: Reference transcript */
          int32 nfr,            /* In: #frames of input */
          char *ctlspec,        /* In: Utt specifiction from control file */
          char *uttid)
{                               /* In: Utterance id, for logging and other use */
    int32 i;
    align_stseg_t *stseg;
    align_phseg_t *phseg;
    align_wdseg_t *wdseg;
    int32 w;

    w = feat_window_size(kbcore_fcb(kbc));  /* #MFC vectors needed on either side of current
                                   frame to compute one feature vector */
    if (nfr <= (w << 1)) {
        E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid,
                (w << 1) + 1, nfr);
        return;
    }

    ptmr_reset_all(timers);

    ptmr_reset(&tm_utt);
    ptmr_start(&tm_utt);
    ptmr_reset(&tm_ovrhd);
    ptmr_start(&tm_ovrhd);
    ptmr_start(timers + tmr_utt);


    if (align_build_sent_hmm(sent, cmd_ln_int32_r(kbc->config, "-insert_sil")) != 0) {
        align_destroy_sent_hmm();
        ptmr_stop(timers + tmr_utt);

        E_ERROR("No sentence HMM; no alignment for %s\n", uttid);

        return;
    }

    align_start_utt(uttid);

    for (i = 0; i < nfr; i++) {
        ptmr_start(timers + tmr_utt);

        /* Obtain active senone flags */
        ptmr_start(timers + tmr_gauden);
        ptmr_start(timers + tmr_senone);

        align_sen_active(ascr->sen_active, ascr->n_sen);

        /* Bah, there ought to be a function for this. */
        if (kbc->ms_mgau) {
            ms_cont_mgau_frame_eval(ascr,
				    kbc->ms_mgau,
				    kbc->mdef, feat[i], i);
        }
        else if (kbc->s2_mgau) {
            s2_semi_mgau_frame_eval(kbc->s2_mgau,
				    ascr, fastgmm, feat[i],
				    i);
        }
        else if (kbc->mgau) {
            approx_cont_mgau_ci_eval(kbcore_svq(kbc),
                                     kbcore_gs(kbc),
                                     kbcore_mgau(kbc),
                                     fastgmm,
                                     kbc->mdef,
                                     feat[i][0],
                                     ascr->cache_ci_senscr[0],
                                     &(ascr->cache_best_list[0]), i,
                                     kbcore_logmath(kbc));
            approx_cont_mgau_frame_eval(kbcore_mdef(kbc),
					kbcore_svq(kbc),
					kbcore_gs(kbc),
					kbcore_mgau(kbc),
					fastgmm, ascr,
					feat[i][0], i,
					ascr->
					cache_ci_senscr[0],
					&tm_ovrhd,
					kbcore_logmath(kbc));
        }

        ptmr_stop(timers + tmr_gauden);
        ptmr_stop(timers + tmr_senone);

        /* Step alignment one frame forward */
        ptmr_start(timers + tmr_align);
        align_frame(ascr->senscr);
        ptmr_stop(timers + tmr_align);
        ptmr_stop(timers + tmr_utt);
    }
    ptmr_stop(&tm_utt);
    ptmr_stop(&tm_ovrhd);

    printf("\n");

    /* Wind up alignment for this utterance */
    if (align_end_utt(&stseg, &phseg, &wdseg) < 0)
        E_ERROR("Final state not reached; no alignment for %s\n\n", uttid);
    else {
        if (s2stsegdir)
            write_s2stseg(s2stsegdir, stseg, uttid, ctlspec, cmd_ln_boolean_r(kbc->config, "-s2cdsen"));
        if (stsegdir)
            write_stseg(stsegdir, stseg, uttid, ctlspec);
        if (phsegdir)
            write_phseg(phsegdir, phseg, uttid, ctlspec);
        if (phlabdir)
            write_phlab(phlabdir, phseg, uttid, ctlspec, cmd_ln_int32_r(kbc->config, "-frate"));
        if (wdsegdir)
            write_wdseg(wdsegdir, wdseg, uttid, ctlspec);
        if (outsentfp)
            write_outsent(outsentfp, wdseg, uttid);
        if (outctlfp)
            write_outctl(outctlfp, ctlspec);
    }

    align_destroy_sent_hmm();

    ptmr_print_all(stdout, timers, nfr * 0.1);

    printf
        ("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n",
         nfr, tm_utt.t_cpu, tm_utt.t_cpu * 100.0 / nfr, tm_utt.t_elapsed,
         tm_utt.t_elapsed * 100.0 / nfr);

    tot_nfr += nfr;
}
int32
viterbi_update(float64 *log_forw_prob,
	       vector_t **feature,
	       uint32 n_obs,
	       state_t *state_seq,
	       uint32 n_state,
	       model_inventory_t *inv,
	       float64 a_beam,
	       float32 spthresh,
	       s3phseg_t *phseg,
	       int32 mixw_reest,
	       int32 tmat_reest,
	       int32 mean_reest,
	       int32 var_reest,
	       int32 pass2var,
	       int32 var_is_full,
	       FILE *pdumpfh,
	       feat_t *fcb)
{
    float64 *scale = NULL;
    float64 **dscale = NULL;
    float64 **active_alpha;
    uint32 **active_astate;
    uint32 **bp;
    uint32 *n_active_astate;
    gauden_t *g;		/* Gaussian density parameters and
				   reestimation sums */
    float32 ***mixw;		/* all mixing weights */
    float64 ***now_den = NULL;	/* Short for den[t] */
    uint32 ***now_den_idx = NULL;/* Short for den_idx[t] */
    uint32 *active_cb;
    uint32 n_active_cb;
    float32 **tacc;		/* Transition matrix reestimation sum accumulators
				   for the utterance. */
    float32 ***wacc;		/* mixing weight reestimation sum accumulators
				   for the utterance. */
    float32 ***denacc = NULL;	/* mean/var reestimation accumulators for time t */
    size_t denacc_size;		/* Total size of data references in denacc.  Allows
				   for quick clears between time frames */
    uint32 n_lcl_cb;
    uint32 *cb_inv;
    uint32 i, j, q;
    int32 t;
    uint32 n_feat;
    uint32 n_density;
    uint32 n_top;
    int ret;
    timing_t *fwd_timer = NULL;
    timing_t *rstu_timer = NULL;
    timing_t *gau_timer = NULL;
    timing_t *rsts_timer = NULL;
    timing_t *rstf_timer = NULL;
    float64 log_fp;	/* accumulator for the log of the probability
			 * of observing the input given the model */
    uint32 max_n_next = 0;
    uint32 n_cb;

    static float64 *p_op = NULL;
    static float64 *p_ci_op = NULL;
    static float64 **d_term = NULL;
    static float64 **d_term_ci = NULL;

    /* caller must ensure that there is some non-zero amount
       of work to be done here */
    assert(n_obs > 0);
    assert(n_state > 0);

    /* Get the forward estimation CPU timer */
    fwd_timer = timing_get("fwd");
    /* Get the per utterance reestimation CPU timer */
    rstu_timer = timing_get("rstu");
    /* Get the Gaussian density evaluation CPU timer */
    gau_timer = timing_get("gau");
    /* Get the per state reestimation CPU timer */
    rsts_timer = timing_get("rsts");
    /* Get the per frame reestimation CPU timer */
    rstf_timer = timing_get("rstf");

    g = inv->gauden;
    n_feat = gauden_n_feat(g);
    n_density = gauden_n_density(g);
    n_top = gauden_n_top(g);
    n_cb = gauden_n_mgau(g);

    if (p_op == NULL) {
	p_op    = ckd_calloc(n_feat, sizeof(float64));
	p_ci_op = ckd_calloc(n_feat, sizeof(float64));
    }

    if (d_term == NULL) {
	d_term    = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64));
	d_term_ci = (float64 **)ckd_calloc_2d(n_feat, n_top, sizeof(float64));
    }

    scale = (float64 *)ckd_calloc(n_obs, sizeof(float64));
    dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32));
    active_alpha  = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));
    active_cb = ckd_calloc(2*n_state, sizeof(uint32));
    bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));

    /* Run forward algorithm, which has embedded Viterbi. */
    if (fwd_timer)
	timing_start(fwd_timer);
    ret = forward(active_alpha, active_astate, n_active_astate, bp,
		  scale, dscale,
		  feature, n_obs, state_seq, n_state,
		  inv, a_beam, phseg, 0);
    /* Dump a phoneme segmentation if requested */
    if (cmd_ln_str("-outphsegdir")) {
	    const char *phsegdir;
	    char *segfn, *uttid;

	    phsegdir = cmd_ln_str("-outphsegdir");
	    uttid = (cmd_ln_int32("-outputfullpath")
		     ? corpus_utt_full_name() : corpus_utt());
	    segfn = ckd_calloc(strlen(phsegdir) + 1
			       + strlen(uttid)
			       + strlen(".phseg") + 1, 1);
	    strcpy(segfn, phsegdir);
	    strcat(segfn, "/");
	    strcat(segfn, uttid);
	    strcat(segfn, ".phseg");
	    write_phseg(segfn, inv, state_seq, active_astate, n_active_astate,
			n_state, n_obs, active_alpha, scale, bp);
	    ckd_free(segfn);
    }
    if (fwd_timer)
	timing_stop(fwd_timer);


    if (ret != S3_SUCCESS) {

	/* Some problem with the utterance, release per utterance storage and
	 * forget about adding the utterance accumulators to the global accumulators */

	goto all_done;
    }

    mixw = inv->mixw;

    if (mixw_reest) {
	/* Need to reallocate mixing accumulators for utt */
	if (inv->l_mixw_acc) {
	    ckd_free_3d((void ***)inv->l_mixw_acc);
	    inv->l_mixw_acc = NULL;
	}
	inv->l_mixw_acc = (float32 ***)ckd_calloc_3d(inv->n_mixw_inverse,
						     n_feat,
						     n_density,
						     sizeof(float32));
    }
    wacc = inv->l_mixw_acc;
    n_lcl_cb = inv->n_cb_inverse;
    cb_inv = inv->cb_inverse;

    /* Allocate local accumulators for mean, variance reestimation
       sums if necessary */
    gauden_alloc_l_acc(g, n_lcl_cb,
		       mean_reest, var_reest,
		       var_is_full);

    if (tmat_reest) {
	if (inv->l_tmat_acc) {
	    ckd_free_2d((void **)inv->l_tmat_acc);
	    inv->l_tmat_acc = NULL;
	}
	for (i = 0; i < n_state; i++) {
	    if (state_seq[i].n_next > max_n_next)
		max_n_next = state_seq[i].n_next;
	}
	inv->l_tmat_acc = (float32 **)ckd_calloc_2d(n_state,
						    max_n_next,
						    sizeof(float32));
    }
    /* transition matrix reestimation sum accumulators
       for the utterance */
    tacc = inv->l_tmat_acc;

    n_active_cb = 0;
    now_den = (float64 ***)ckd_calloc_3d(n_lcl_cb,
					 n_feat,
					 n_top,
					 sizeof(float64));
    now_den_idx =  (uint32 ***)ckd_calloc_3d(n_lcl_cb,
					     n_feat,
					     n_top,
					     sizeof(uint32));

    if (mean_reest || var_reest) {
	/* allocate space for the per frame density counts */
	denacc = (float32 ***)ckd_calloc_3d(n_lcl_cb,
					    n_feat,
					    n_density,
					    sizeof(float32));

	/* # of bytes required to store all weighted vectors */
	denacc_size = n_lcl_cb * n_feat * n_density * sizeof(float32);
    }
    else {
	denacc = NULL;
	denacc_size = 0;
    }

    /* Okay now run through the backtrace and accumulate counts. */
    /* Find the non-emitting ending state */
    for (q = 0; q < n_active_astate[n_obs-1]; ++q) {
	if (active_astate[n_obs-1][q] == n_state-1)
	    break;
    }
    if (q == n_active_astate[n_obs-1]) {
	E_ERROR("Failed to align audio to trancript: final state of the search is not reached\n");
	ret = S3_ERROR;
	goto all_done;
    }

    for (t = n_obs-1; t >= 0; --t) {
	uint32 l_cb;
	uint32 l_ci_cb;
	float64 op, p_reest_term;
	uint32 prev;

	j = active_astate[t][q];

	/* Follow any non-emitting states at time t first. */
	while (state_seq[j].mixw == TYING_NON_EMITTING) {
	    prev = active_astate[t][bp[t][q]];

#if VITERBI_DEBUG
	    printf("Following non-emitting state at time %d, %u => %u\n",
		   t, j, prev);
#endif
	    /* Backtrace and accumulate transition counts. */
	    if (tmat_reest) {
		assert(tacc != NULL);
		tacc[prev][j - prev] += 1.0;
	    }
	    q = bp[t][q];
	    j = prev;
	}

	/* Now accumulate statistics for the real state. */
	l_cb = state_seq[j].l_cb;
	l_ci_cb = state_seq[j].l_ci_cb;
	n_active_cb = 0;

	if (gau_timer)
	    timing_start(gau_timer);

	gauden_compute_log(now_den[l_cb],
			   now_den_idx[l_cb],
			   feature[t],
			   g,
			   state_seq[j].cb,
			   NULL);
	active_cb[n_active_cb++] = l_cb;

	if (l_cb != l_ci_cb) {
	    gauden_compute_log(now_den[l_ci_cb],
			       now_den_idx[l_ci_cb],
			       feature[t],
			       g,
			       state_seq[j].ci_cb,
			       NULL);
	    active_cb[n_active_cb++] = l_ci_cb;
	}
	gauden_scale_densities_bwd(now_den, now_den_idx,
				   &dscale[t],
				   active_cb, n_active_cb, g);

	assert(state_seq[j].mixw != TYING_NON_EMITTING);
	/* Now calculate mixture densities. */
	/* This is the normalizer sum_m c_{jm} p(o_t|\lambda_{jm}) */
	op = gauden_mixture(now_den[l_cb], now_den_idx[l_cb],
			    mixw[state_seq[j].mixw], g);
	if (gau_timer)
	    timing_stop(gau_timer);

	if (rsts_timer)
	    timing_start(rsts_timer);
	/* Make up this bogus value to be consistent with backward.c */
	p_reest_term = 1.0 / op;

	/* Compute the output probability excluding the contribution
	 * of each feature stream.  i.e. p_op[0] is the output
	 * probability excluding feature stream 0 */
	partial_op(p_op,
		   op,
		   now_den[l_cb],
		   now_den_idx[l_cb],
		   mixw[state_seq[j].mixw],
		   n_feat,
		   n_top);

	/* compute the probability of each (of possibly topn) density */
	den_terms(d_term,
		  p_reest_term,
		  p_op,
		  now_den[l_cb],
		  now_den_idx[l_cb],
		  mixw[state_seq[j].mixw],
		  n_feat,
		  n_top);

	if (l_cb != l_ci_cb) {
	    /* For each feature stream f, compute:
	     *     sum_k(mixw[f][k] den[f][k])
	     * and store the results in p_ci_op */
	    partial_ci_op(p_ci_op,
			  now_den[l_ci_cb],
			  now_den_idx[l_ci_cb],
			  mixw[state_seq[j].ci_mixw],
			  n_feat,
			  n_top);

	    /* For each feature stream and density compute the terms:
	     *   w[f][k] den[f][k] / sum_k(w[f][k] den[f][k]) * post_j
	     * and store results in d_term_ci */
	    den_terms_ci(d_term_ci,
			 1.0, /* post_j = 1.0 */
			 p_ci_op,
			 now_den[l_ci_cb],
			 now_den_idx[l_ci_cb],
			 mixw[state_seq[j].ci_mixw],
			 n_feat,
			 n_top);
	}
		    

	/* accumulate the probability for each density in the mixing
	 * weight reestimation accumulators */
	if (mixw_reest) {
	    accum_den_terms(wacc[state_seq[j].l_mixw], d_term,
			    now_den_idx[l_cb], n_feat, n_top);

	    /* check if mixw and ci_mixw are different to avoid
	     * doubling the EM counts in a CI run. */
	    if (state_seq[j].mixw != state_seq[j].ci_mixw) {
                if (n_cb < inv->n_mixw) {
                    /* semi-continuous, tied mixture, and discrete case */
		    accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term,
				    now_den_idx[l_cb], n_feat, n_top);
		}
		else {
		    /* continuous case */
		    accum_den_terms(wacc[state_seq[j].l_ci_mixw], d_term_ci,
				    now_den_idx[l_ci_cb], n_feat, n_top);
		}
	    }
	}
		    
	/* accumulate the probability for each density in the 
	 * density reestimation accumulators */
	if (mean_reest || var_reest) {
	    accum_den_terms(denacc[l_cb], d_term,
			    now_den_idx[l_cb], n_feat, n_top);
	    if (l_cb != l_ci_cb) {
		accum_den_terms(denacc[l_ci_cb], d_term_ci,
				now_den_idx[l_ci_cb], n_feat, n_top);
	    }
	}
		
	if (rsts_timer)
	    timing_stop(rsts_timer);
	/* Note that there is only one state/frame so this is kind of
	   redundant */
 	if (rstf_timer)
	    timing_start(rstf_timer);
	if (mean_reest || var_reest) {
	    /* Update the mean and variance reestimation accumulators */
	    if (pdumpfh)
		fprintf(pdumpfh, "time %d:\n", t);
	    accum_gauden(denacc,
			 cb_inv,
			 n_lcl_cb,
			 feature[t],
			 now_den_idx,
			 g,
			 mean_reest,
			 var_reest,
			 pass2var,
			 inv->l_mixw_acc,
			 var_is_full,
			 pdumpfh,
			 fcb);
	    memset(&denacc[0][0][0], 0, denacc_size);
	}
	if (rstf_timer)
	    timing_stop(rstf_timer);

	if (t > 0) { 
	    prev = active_astate[t-1][bp[t][q]];
#if VITERBI_DEBUG
	    printf("Backtrace at time %d, %u => %u\n",
		   t, j, prev);
#endif
	    /* Backtrace and accumulate transition counts. */
	    if (tmat_reest) {
		assert(tacc != NULL);
		tacc[prev][j-prev] += 1.0;
	    }
	    q = bp[t][q];
	    j = prev;
	}
    }

    /* If no error was found, add the resulting utterance reestimation
     * accumulators to the global reestimation accumulators */
    if (rstu_timer)
	timing_start(rstu_timer);
    accum_global(inv, state_seq, n_state,
		 mixw_reest, tmat_reest, mean_reest, var_reest,
		 var_is_full);
    if (rstu_timer)
	timing_stop(rstu_timer);

    /* Find the final state */
    for (i = 0; i < n_active_astate[n_obs-1]; ++i) {
	if (active_astate[n_obs-1][i] == n_state-1)
	    break;
    }
    /* Calculate log[ p( O | \lambda ) ] */
    assert(active_alpha[n_obs-1][i] > 0);
    log_fp = log(active_alpha[n_obs-1][i]);
    for (t = 0; t < n_obs; t++) {
	assert(scale[t] > 0);
	log_fp -= log(scale[t]);
	for (j = 0; j < inv->gauden->n_feat; j++) {
	    log_fp += dscale[t][j];
	}
    }

    *log_forw_prob = log_fp;

 all_done:
    ckd_free((void *)scale);
    for (i = 0; i < n_obs; i++) {
	if (dscale[i])
	    ckd_free((void *)dscale[i]);
    }
    ckd_free((void **)dscale);
    
    ckd_free(n_active_astate);
    for (i = 0; i < n_obs; i++) {
	ckd_free((void *)active_alpha[i]);
	ckd_free((void *)active_astate[i]);
	ckd_free((void *)bp[i]);
    }
    ckd_free((void *)active_alpha);
    ckd_free((void *)active_astate);
    ckd_free((void *)active_cb);

    if (denacc)
	ckd_free_3d((void ***)denacc);

    if (now_den)
	ckd_free_3d((void ***)now_den);
    if (now_den_idx)
	ckd_free_3d((void ***)now_den_idx);

    if (ret != S3_SUCCESS)
	E_ERROR("%s ignored\n", corpus_utt_brief_name());

    return ret;
}
Exemple #4
0
int32
baum_welch_update(float64 *log_forw_prob,
                  vector_t **feature,
                  uint32 n_obs,
                  state_t *state,
                  uint32 n_state,
                  model_inventory_t *inv,
                  float64 a_beam,
                  float64 b_beam,
                  float32 spthresh,
                  s3phseg_t *phseg,
                  int32 mixw_reest,
                  int32 tmat_reest,
                  int32 mean_reest,
                  int32 var_reest,
                  int32 pass2var,
                  int32 var_is_full,
                  FILE *pdumpfh,
                  bw_timers_t *timers,
                  feat_t *fcb)
{
    float64 *scale = NULL;
    float64 **dscale = NULL;
    float64 **active_alpha;
    uint32 **active_astate;
    uint32 **bp;
    uint32 *n_active_astate;
    float64 log_fp;	/* accumulator for the log of the probability
			 * of observing the input given the model */
    uint32 t;		/* time */
    int ret;
    uint32 i,j;

    /* caller must ensure that there is some non-zero amount
       of work to be done here */
    assert(n_obs > 0);
    assert(n_state > 0);

    scale = (float64 *)ckd_calloc(n_obs, sizeof(float64));
    dscale = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    n_active_astate = (uint32 *)ckd_calloc(n_obs, sizeof(uint32));
    active_alpha  = (float64 **)ckd_calloc(n_obs, sizeof(float64 *));
    active_astate = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));
    bp = (uint32 **)ckd_calloc(n_obs, sizeof(uint32 *));

    /* Compute the scaled alpha variable and scale factors
     * for all states and time subject to the pruning constraints */
    if (timers)
        ptmr_start(&timers->fwd_timer);

    /*
     * Debug?
     *   E_INFO("Before Forward search\n");
     */
    ret = forward(active_alpha, active_astate, n_active_astate, bp,
                  scale, dscale,
                  feature, n_obs, state, n_state,
                  inv, a_beam, phseg, timers, 0);

#if BW_DEBUG
    for (i=0 ; i < n_obs; i++) {
        E_INFO("Number of active states %d at time %d\n",n_active_astate[i],i);
        E_INFO("Scale of time %d is %e \n",i,scale[i]);
        for(j=0 ; j < n_active_astate[i]; j++) {
            E_INFO("Active state: %d Active alpha: %e\n",active_astate[i][j], active_alpha[i][j]);
        }
    }
    i=0;
    j=0;
#endif

    /* Dump a phoneme segmentation if requested */
    if (cmd_ln_str("-outphsegdir")) {
        const char *phsegdir;
        char *segfn, *uttid;

        phsegdir = cmd_ln_str("-outphsegdir");
        uttid = (cmd_ln_int32("-outputfullpath")
                 ? corpus_utt_full_name() : corpus_utt());
        segfn = ckd_calloc(strlen(phsegdir) + 1
                           + strlen(uttid)
                           + strlen(".phseg") + 1, 1);
        strcpy(segfn, phsegdir);
        strcat(segfn, "/");
        strcat(segfn, uttid);
        strcat(segfn, ".phseg");
        write_phseg(segfn, inv, state, active_astate, n_active_astate,
                    n_state, n_obs, active_alpha, scale, bp);
        ckd_free(segfn);
    }

    if (timers)
        ptmr_stop(&timers->fwd_timer);

    if (ret != S3_SUCCESS) {

        /* Some problem with the utterance, release per utterance storage and
         * forget about adding the utterance accumulators to the global accumulators */

        goto error;
    }

    /* Compute the scaled beta variable and update the reestimation
     * sums */
    if (timers)
        ptmr_start(&timers->bwd_timer);

#if BW_DEBUG
    E_INFO("Before Backward search\n");
#endif

    ret = backward_update(active_alpha, active_astate, n_active_astate, scale, dscale,
                          feature, n_obs,
                          state, n_state,
                          inv, b_beam, spthresh,
                          mixw_reest, tmat_reest, mean_reest, var_reest, pass2var,
                          var_is_full, pdumpfh, timers, fcb);
    if (timers)
        ptmr_stop(&timers->bwd_timer);

    if (ret != S3_SUCCESS) {

        /* Some problem with the utterance, release per utterance storage and
         * forget about adding the utterance accumulators to the global accumulators */

        goto error;
    }

#if BW_DEBUG
    E_INFO("Before Global Accumulation\n");
#endif

    /* If no error was found in the forward or backward procedures,
     * add the resulting utterance reestimation accumulators to the
     * global reestimation accumulators */
    if (timers)
        ptmr_start(&timers->rstu_timer);
    accum_global(inv, state, n_state,
                 mixw_reest, tmat_reest, mean_reest, var_reest,
                 var_is_full);
    if (timers)
        ptmr_stop(&timers->rstu_timer);

    for (i = 0; i < n_active_astate[n_obs-1] && active_astate[n_obs-1][i] != (n_state-1); i++);

    assert(i < n_active_astate[n_obs-1]);

    /* Calculate log[ p( O | \lambda ) ] */
    assert(active_alpha[n_obs-1][i] > 0);
    log_fp = log(active_alpha[n_obs-1][i]);
    for (t = 0; t < n_obs; t++) {
        assert(scale[t] > 0);
        log_fp -= log(scale[t]);
        for (j = 0; j < inv->gauden->n_feat; j++) {
            log_fp += dscale[t][j];
        }
    }

    *log_forw_prob = log_fp;

    ckd_free((void *)scale);
    ckd_free(n_active_astate);
    for (i = 0; i < n_obs; i++) {
        ckd_free((void *)active_alpha[i]);
        ckd_free((void *)active_astate[i]);
        ckd_free((void *)dscale[i]);
        ckd_free((void *)bp[i]);
    }
    ckd_free((void *)active_alpha);
    ckd_free((void *)active_astate);
    ckd_free((void **)dscale);
    ckd_free(bp);

    return S3_SUCCESS;

error:
    ckd_free((void *)scale);
    for (i = 0; i < n_obs; i++) {
        if (dscale[i])
            ckd_free((void *)dscale[i]);
    }
    ckd_free((void **)dscale);

    ckd_free(n_active_astate);
    for (i = 0; i < n_obs; i++) {
        ckd_free((void *)active_alpha[i]);
        ckd_free((void *)active_astate[i]);
        ckd_free((void *)bp[i]);
    }
    ckd_free((void *)active_alpha);
    ckd_free((void *)active_astate);
    ckd_free(bp);

    E_ERROR("%s ignored\n", corpus_utt_brief_name());

    return S3_ERROR;
}