int32 s3_cd_gmm_compute_sen(void *srch, float32 ** feat, int32 wav_idx) { srch_t *s; mdef_t *mdef; ms_mgau_model_t *ms_mgau; mgau_model_t *mgau; ascr_t *ascr; kbcore_t *kbcore; fast_gmm_t *fgmm; pl_t *pl; stat_t *st; float32 *fv; s = (srch_t *) srch; kbcore = s->kbc; pl = s->pl; fgmm = s->fastgmm; st = s->stat; mdef = kbcore_mdef(kbcore); ms_mgau = kbcore_ms_mgau(kbcore); mgau = kbcore_mgau(kbcore); ascr = s->ascr; assert(kbcore->ms_mgau || kbcore->mgau || kbcore->s2_mgau); assert(!(kbcore->ms_mgau && kbcore->mgau && kbcore->s2_mgau)); /* Always use the first buffer in the cache */ if (kbcore->ms_mgau) { s->senscale = ms_cont_mgau_frame_eval(ascr, ms_mgau, mdef, feat, wav_idx); /* FIXME: Statistics is not correctly updated */ } else if (kbcore->s2_mgau) { s->senscale = s2_semi_mgau_frame_eval(kbcore->s2_mgau, ascr, fgmm, feat, wav_idx); /* FIXME: Statistics is not correctly updated */ } else if (kbcore->mgau) { fv = feat[0]; s->senscale = approx_cont_mgau_frame_eval(mdef, kbcore_svq(kbcore), kbcore_gs(kbcore), mgau, fgmm, ascr, fv, wav_idx, ascr->cache_ci_senscr[s-> cache_win_strt], &(st->tm_ovrhd), kbcore_logmath(kbcore)); st->utt_sen_eval += mgau_frm_sen_eval(mgau); st->utt_gau_eval += mgau_frm_gau_eval(mgau); } else E_FATAL("Panic, someone delete the assertion before this block\n"); return SRCH_SUCCESS; }
int32 approx_ci_gmm_compute(void *srch, float32 * feat, int32 cache_idx, int32 wav_idx) { srch_t *s; stat_t *st; fast_gmm_t *fgmm; mdef_t *mdef; mgau_model_t *mgau; kbcore_t *kbcore; ascr_t *ascr; s = (srch_t *) srch; kbcore = s->kbc; mdef = kbcore_mdef(kbcore); mgau = kbcore_mgau(kbcore); fgmm = s->fastgmm; st = s->stat; ascr = s->ascr; /* No CI-GMM done for multistream models (for now) */ if (mgau == NULL) { assert(kbcore_ms_mgau(kbcore) || kbcore_s2_mgau(kbcore)); return SRCH_SUCCESS; } approx_cont_mgau_ci_eval(kbcore_svq(kbcore), kbcore_gs(kbcore), mgau, fgmm, mdef, feat, ascr->cache_ci_senscr[cache_idx], &(ascr->cache_best_list[cache_idx]), wav_idx, kbcore_logmath(kbcore)); st->utt_cisen_eval += mgau_frm_cisen_eval(mgau); st->utt_cigau_eval += mgau_frm_cigau_eval(mgau); return SRCH_SUCCESS; }
/* * Find Viterbi alignment. */ static void align_utt(char *sent, /* In: Reference transcript */ int32 nfr, /* In: #frames of input */ char *ctlspec, /* In: Utt specifiction from control file */ char *uttid) { /* In: Utterance id, for logging and other use */ int32 i; align_stseg_t *stseg; align_phseg_t *phseg; align_wdseg_t *wdseg; int32 w; w = feat_window_size(kbcore_fcb(kbc)); /* #MFC vectors needed on either side of current frame to compute one feature vector */ if (nfr <= (w << 1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w << 1) + 1, nfr); return; } ptmr_reset_all(timers); ptmr_reset(&tm_utt); ptmr_start(&tm_utt); ptmr_reset(&tm_ovrhd); ptmr_start(&tm_ovrhd); ptmr_start(timers + tmr_utt); if (align_build_sent_hmm(sent, cmd_ln_int32_r(kbc->config, "-insert_sil")) != 0) { align_destroy_sent_hmm(); ptmr_stop(timers + tmr_utt); E_ERROR("No sentence HMM; no alignment for %s\n", uttid); return; } align_start_utt(uttid); for (i = 0; i < nfr; i++) { ptmr_start(timers + tmr_utt); /* Obtain active senone flags */ ptmr_start(timers + tmr_gauden); ptmr_start(timers + tmr_senone); align_sen_active(ascr->sen_active, ascr->n_sen); /* Bah, there ought to be a function for this. */ if (kbc->ms_mgau) { ms_cont_mgau_frame_eval(ascr, kbc->ms_mgau, kbc->mdef, feat[i], i); } else if (kbc->s2_mgau) { s2_semi_mgau_frame_eval(kbc->s2_mgau, ascr, fastgmm, feat[i], i); } else if (kbc->mgau) { approx_cont_mgau_ci_eval(kbcore_svq(kbc), kbcore_gs(kbc), kbcore_mgau(kbc), fastgmm, kbc->mdef, feat[i][0], ascr->cache_ci_senscr[0], &(ascr->cache_best_list[0]), i, kbcore_logmath(kbc)); approx_cont_mgau_frame_eval(kbcore_mdef(kbc), kbcore_svq(kbc), kbcore_gs(kbc), kbcore_mgau(kbc), fastgmm, ascr, feat[i][0], i, ascr-> cache_ci_senscr[0], &tm_ovrhd, kbcore_logmath(kbc)); } ptmr_stop(timers + tmr_gauden); ptmr_stop(timers + tmr_senone); /* Step alignment one frame forward */ ptmr_start(timers + tmr_align); align_frame(ascr->senscr); ptmr_stop(timers + tmr_align); ptmr_stop(timers + tmr_utt); } ptmr_stop(&tm_utt); ptmr_stop(&tm_ovrhd); printf("\n"); /* Wind up alignment for this utterance */ if (align_end_utt(&stseg, &phseg, &wdseg) < 0) E_ERROR("Final state not reached; no alignment for %s\n\n", uttid); else { if (s2stsegdir) write_s2stseg(s2stsegdir, stseg, uttid, ctlspec, cmd_ln_boolean_r(kbc->config, "-s2cdsen")); if (stsegdir) write_stseg(stsegdir, stseg, uttid, ctlspec); if (phsegdir) write_phseg(phsegdir, phseg, uttid, ctlspec); if (phlabdir) write_phlab(phlabdir, phseg, uttid, ctlspec, cmd_ln_int32_r(kbc->config, "-frate")); if (wdsegdir) write_wdseg(wdsegdir, wdseg, uttid, ctlspec); if (outsentfp) write_outsent(outsentfp, wdseg, uttid); if (outctlfp) write_outctl(outctlfp, ctlspec); } align_destroy_sent_hmm(); ptmr_print_all(stdout, timers, nfr * 0.1); printf ("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n", nfr, tm_utt.t_cpu, tm_utt.t_cpu * 100.0 / nfr, tm_utt.t_elapsed, tm_utt.t_elapsed * 100.0 / nfr); tot_nfr += nfr; }
void gmm_compute(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { kb_t *kb; kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; mgau_model_t *mgau; subvq_t *svq; gs_t *gs; int32 ptranskip; int32 s, f, t; int32 single_el_list[2]; stats_t cur_ci_st; stats_t cur_cd_st; stats_t cur_cd_Nbest_st; stats_t *stptr; char str[100]; int32 *idx; int32 *cur_bstidx; int32 *last_bstidx; int32 *cur_scr; int32 *last_scr; int32 tmpint; s3senid_t *cd2cisen; int32 pheurtype; E_INFO("Processing: %s\n", uttid); kb = (kb_t *) data; kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); d2p = kbcore_dict2pid(kbcore); mgau = kbcore_mgau(kbcore); svq = kbcore_svq(kbcore); gs = kbcore_gs(kbcore); kb->uttid = uttid; ptranskip = kb->beam->ptranskip; pheurtype = kb->pl->pheurtype; single_el_list[0] = -1; single_el_list[1] = -1; /* Read mfc file and build feature vectors for entire utterance */ kb->stat->nfr = feat_s2mfc2feat(kbcore_fcb(kbcore), ur->uttfile, cmd_ln_str("-cepdir"), ".mfc", sf, ef, kb->feat, S3_MAX_FRAMES); cd2cisen = mdef_cd2cisen(mdef); /*This should be a procedure instead of just logic */ init_stat(&cur_cd_st, "Current CD Senone"); init_stat(&cur_ci_st, "Current CI Senone"); init_stat(&cur_cd_Nbest_st, "Current CD NBest Senone"); for (s = 0; s < mdef->n_ci_sen; s++) { sprintf(str, "Cur Senone %d", s); init_stat(&cur_sen_st[s], str); } for (t = 0; t < (int32) (mdef->n_sen - mdef->n_ci_sen) / NBEST_STEP; t++) { sprintf(str, " %d -Cur Best Senone", t * NBEST_STEP); init_stat(&cur_sen_Nbest_st[t], str); } idx = ckd_calloc(mdef->n_sen - mdef->n_ci_sen, sizeof(int32)); /* Allocate temporary array for CurScr and Curbst indx and Lat index */ cur_bstidx = ckd_calloc(mdef->n_sen, sizeof(int32)); last_bstidx = ckd_calloc(mdef->n_sen, sizeof(int32)); cur_scr = ckd_calloc(mdef->n_sen, sizeof(int32)); last_scr = ckd_calloc(mdef->n_sen, sizeof(int32)); for (f = 0; f < kb->stat->nfr; f++) { for (s = 0; s < mgau->n_mgau; s++) { /*1, Compute the approximate scores with the last best index. */ if (mgau->mgau[s].bstidx != NO_BSTIDX) { single_el_list[0] = mgau->mgau[s].bstidx; last_bstidx[s] = mgau->mgau[s].bstidx; last_scr[s] = mgau_eval(mgau, s, single_el_list, kb->feat[f][0], f, 0); } else { last_bstidx[s] = NO_BSTIDX; } /*2, Compute the exact scores and sort them and get the ranking. */ kb->ascr->senscr[s] = mgau_eval(mgau, s, NULL, kb->feat[f][0], f, 1); /*3, Compute the approximate scores with the current best index */ if (mgau->mgau[s].bstidx != NO_BSTIDX) { single_el_list[0] = mgau->mgau[s].bstidx; cur_bstidx[s] = mgau->mgau[s].bstidx; cur_scr[s] = mgau_eval(mgau, s, single_el_list, kb->feat[f][0], f, 0); } else { cur_bstidx[s] = NO_BSTIDX; } /* Only test for CD senones, test for best index hit */ /*Update either CI senone and CD senone) */ if (!mdef_is_cisenone(mdef, s)) stptr = &cur_cd_st; else stptr = &cur_ci_st; increment_stat(stptr, abs(last_scr[s] - kb->ascr->senscr[s]), abs(cur_scr[s] - kb->ascr->senscr[s]), abs(kb->ascr->senscr[cd2cisen[s]] - kb->ascr->senscr[s]), (cur_bstidx[s] == last_bstidx[s])); if (!mdef_is_cisenone(mdef, s)) { stptr = &cur_sen_st[cd2cisen[s]]; increment_stat(stptr, abs(last_scr[s] - kb->ascr->senscr[s]), abs(cur_scr[s] - kb->ascr->senscr[s]), abs(kb->ascr->senscr[cd2cisen[s]] - kb->ascr->senscr[s]), (cur_bstidx[s] == last_bstidx[s])); stptr->total_senone += 1; } } cur_cd_st.total_fr++; cur_cd_st.total_senone += mdef->n_sen - mdef->n_ci_sen; cur_ci_st.total_fr++; cur_ci_st.total_senone += mdef->n_ci_sen; for (s = 0; s < mdef->n_ci_sen; s++) { cur_sen_st[s].total_fr++; } /*This is the part we need to do sorting */ /*1, sort the scores in the current frames */ /*E_INFO("At frame %d\n",f); */ /*Pointer trick at here. */ for (s = 0; s < mdef->n_sen - mdef->n_ci_sen; s++) { idx[s] = s; } cd = &(kb->ascr->senscr[mdef->n_ci_sen]); qsort(idx, mdef->n_sen - mdef->n_ci_sen, sizeof(int32), intcmp_gmm_compute); /*This loop is stupid and it is just a hack. */ for (s = 0; s < mdef->n_sen - mdef->n_ci_sen; s++) { tmpint = idx[s] + mdef->n_ci_sen; for (t = 0; t < (int32) ((float) (mdef->n_sen - mdef->n_ci_sen) / (float) NBEST_STEP); t++) { if (s < t * NBEST_STEP) { increment_stat(&cur_sen_Nbest_st[t], abs(last_scr[tmpint] - kb->ascr->senscr[tmpint]), abs(cur_scr[tmpint] - kb->ascr->senscr[tmpint]), abs(kb->ascr->senscr[cd2cisen[tmpint]] - kb->ascr->senscr[tmpint]), (cur_bstidx[tmpint] == last_bstidx[tmpint])); cur_sen_Nbest_st[t].total_senone += 1; } } } for (t = 0; t < (int32) ((float) (mdef->n_sen - mdef->n_ci_sen) / (float) NBEST_STEP); t++) { cur_sen_Nbest_st[t].total_fr++; } } print_stat(&cur_cd_st); print_stat(&cur_ci_st); print_stat(&cur_sen_Nbest_st[1]); /*Only show the first NBEST_STEP best */ add_stat(&cd_st, &cur_cd_st); add_stat(&ci_st, &cur_ci_st); for (s = 0; s < mdef->n_ci_sen; s++) { add_stat(&sen_st[s], &cur_sen_st[s]); } for (s = 0; s < (int32) (mdef->n_sen - mdef->n_ci_sen) / NBEST_STEP; s++) { add_stat(&sen_Nbest_st[s], &cur_sen_Nbest_st[s]); } ckd_free(idx); ckd_free(cur_bstidx); ckd_free(last_bstidx); ckd_free(cur_scr); ckd_free(last_scr); }