void utt_decode(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { kb_t *kb; kbcore_t *kbcore; cmd_ln_t *config; int32 num_decode_frame; int32 total_frame; stat_t *st; srch_t *s; num_decode_frame = 0; E_INFO("Processing: %s\n", uttid); kb = (kb_t *) data; kbcore = kb->kbcore; config = kbcore_config(kbcore); kb_set_uttid(uttid, ur->uttfile, kb); st = kb->stat; /* Convert input file to cepstra if waveform input is selected */ if (cmd_ln_boolean_r(config, "-adcin")) { int16 *adcdata; int32 nsamps = 0; if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"), ur->uttfile, cmd_ln_str_r(config, "-cepext"), cmd_ln_int32_r(config, "-adchdr"), strcmp(cmd_ln_str_r(config, "-input_endian"), "big"), &nsamps)) == NULL) { E_FATAL("Cannot read file %s\n", ur->uttfile); } if (kb->mfcc) { ckd_free_2d((void **)kb->mfcc); } fe_start_utt(kb->fe); if (fe_process_utt(kb->fe, adcdata, nsamps, &kb->mfcc, &total_frame) < 0) { E_FATAL("MFCC calculation failed\n", ur->uttfile); } ckd_free(adcdata); if (total_frame > S3_MAX_FRAMES) { E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES); } if ((total_frame = feat_s2mfc2feat_live(kbcore_fcb(kbcore), kb->mfcc, &total_frame, TRUE, TRUE, kb->feat)) < 0) { E_FATAL("Feature computation failed\n"); } } else { /* Read mfc file and build feature vectors for entire utterance */ if ((total_frame = feat_s2mfc2feat(kbcore_fcb(kbcore), ur->uttfile, cmd_ln_str_r(config, "-cepdir"), cmd_ln_str_r(config, "-cepext"), sf, ef, kb->feat, S3_MAX_FRAMES)) < 0) { E_FATAL("Cannot read file %s. Forced exit\n", ur->uttfile); } } /* Also need to make sure we don't set resource if it is the same. Well, this mechanism could be provided inside the following function. */ s = kb->srch; if (ur->lmname != NULL) srch_set_lm(s, ur->lmname); if (ur->regmatname != NULL) kb_setmllr(ur->regmatname, ur->cb2mllrname, kb); /* These are necessary! */ s->uttid = kb->uttid; s->uttfile = kb->uttfile; utt_begin(kb); utt_decode_block(kb->feat, total_frame, &num_decode_frame, kb); utt_end(kb); st->tot_fr += st->nfr; }
static void utt_align(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { int32 nfr; int k, i; const char *cepdir; const char *cepext; char sent[16384]; cmd_ln_t *config = (cmd_ln_t*) data; cepdir = cmd_ln_str_r(kbc->config, "-cepdir"); cepext = cmd_ln_str_r(kbc->config, "-cepext"); /* UGLY! */ /* Read utterance transcript and match it with the control file. */ if (fgets(sent, sizeof(sent), sentfp) == NULL) { E_FATAL("EOF(%s) of the transcription\n", sentfile); } /* E_INFO("SENT %s\n",sent); */ /* Strip utterance id from the end of the transcript */ for (k = strlen(sent) - 1; (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t') || (sent[k] == ' ')); --k); if ((k > 0) && (sent[k] == ')')) { for (--k; (k >= 0) && (sent[k] != '('); --k); if ((k >= 0) && (sent[k] == '(')) { sent[k] = '\0'; /* Check that uttid in transcript and control file match */ for (i = ++k; sent[i] && (sent[i] != ')') && (sent[i] != '\n') && (sent[i] != '\t') && (sent[i] != ' '); i++); sent[i] = '\0'; if (id_cmp(sent + k, uttid) != 0) E_ERROR ("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n", uttid, sent + k); } } /* Convert input file to cepstra if waveform input is selected */ if (cmd_ln_boolean_r(config, "-adcin")) { int16 *adcdata; int32 nsamps = 0; mfcc_t **mfcc; if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"), ur->uttfile, cmd_ln_str_r(config, "-cepext"), cmd_ln_int32_r(config, "-adchdr"), strcmp(cmd_ln_str_r(config, "-input_endian"), "big"), &nsamps)) == NULL) { E_FATAL("Cannot read file %s\n", ur->uttfile); } fe_start_utt(fe); if (fe_process_utt(fe, adcdata, nsamps, &mfcc, &nfr) < 0) { E_FATAL("MFCC calculation failed\n", ur->uttfile); } ckd_free(adcdata); if (nfr > S3_MAX_FRAMES) { E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES); } if ((nfr = feat_s2mfc2feat_live(kbcore_fcb(kbc), mfcc, &nfr, TRUE, TRUE, feat)) < 0) { E_FATAL("Feature computation failed\n"); } if (mfcc) ckd_free_2d((void **)mfcc); } else { nfr = feat_s2mfc2feat(kbcore_fcb(kbc), ur->uttfile, cepdir, cepext, sf, ef, feat, S3_MAX_FRAMES); } if (ur->regmatname) { if (kbc->mgau) adapt_set_mllr(adapt_am, kbc->mgau, ur->regmatname, ur->cb2mllrname, kbc->mdef, kbc->config); else if (kbc->ms_mgau) model_set_mllr(kbc->ms_mgau, ur->regmatname, ur->cb2mllrname, kbcore_fcb(kbc), kbc->mdef, kbc->config); else E_WARN("Can't use MLLR matrices with .s2semi. yet\n"); } if (nfr <= 0) { if (cepdir != NULL) { E_ERROR ("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n", uttid, ur->uttfile, cepdir, cepext); } else { E_ERROR ("Utt %s: Input file read (%s) with extension (%s) failed \n", uttid, ur->uttfile, cepext); } } else { E_INFO("%s: %d input frames\n", uttid, nfr); align_utt(sent, nfr, ur->uttfile, uttid); } }
void gmm_compute(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { kb_t *kb; kbcore_t *kbcore; mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; mgau_model_t *mgau; subvq_t *svq; gs_t *gs; int32 ptranskip; int32 s, f, t; int32 single_el_list[2]; stats_t cur_ci_st; stats_t cur_cd_st; stats_t cur_cd_Nbest_st; stats_t *stptr; char str[100]; int32 *idx; int32 *cur_bstidx; int32 *last_bstidx; int32 *cur_scr; int32 *last_scr; int32 tmpint; s3senid_t *cd2cisen; int32 pheurtype; E_INFO("Processing: %s\n", uttid); kb = (kb_t *) data; kbcore = kb->kbcore; mdef = kbcore_mdef(kbcore); dict = kbcore_dict(kbcore); d2p = kbcore_dict2pid(kbcore); mgau = kbcore_mgau(kbcore); svq = kbcore_svq(kbcore); gs = kbcore_gs(kbcore); kb->uttid = uttid; ptranskip = kb->beam->ptranskip; pheurtype = kb->pl->pheurtype; single_el_list[0] = -1; single_el_list[1] = -1; /* Read mfc file and build feature vectors for entire utterance */ kb->stat->nfr = feat_s2mfc2feat(kbcore_fcb(kbcore), ur->uttfile, cmd_ln_str("-cepdir"), ".mfc", sf, ef, kb->feat, S3_MAX_FRAMES); cd2cisen = mdef_cd2cisen(mdef); /*This should be a procedure instead of just logic */ init_stat(&cur_cd_st, "Current CD Senone"); init_stat(&cur_ci_st, "Current CI Senone"); init_stat(&cur_cd_Nbest_st, "Current CD NBest Senone"); for (s = 0; s < mdef->n_ci_sen; s++) { sprintf(str, "Cur Senone %d", s); init_stat(&cur_sen_st[s], str); } for (t = 0; t < (int32) (mdef->n_sen - mdef->n_ci_sen) / NBEST_STEP; t++) { sprintf(str, " %d -Cur Best Senone", t * NBEST_STEP); init_stat(&cur_sen_Nbest_st[t], str); } idx = ckd_calloc(mdef->n_sen - mdef->n_ci_sen, sizeof(int32)); /* Allocate temporary array for CurScr and Curbst indx and Lat index */ cur_bstidx = ckd_calloc(mdef->n_sen, sizeof(int32)); last_bstidx = ckd_calloc(mdef->n_sen, sizeof(int32)); cur_scr = ckd_calloc(mdef->n_sen, sizeof(int32)); last_scr = ckd_calloc(mdef->n_sen, sizeof(int32)); for (f = 0; f < kb->stat->nfr; f++) { for (s = 0; s < mgau->n_mgau; s++) { /*1, Compute the approximate scores with the last best index. */ if (mgau->mgau[s].bstidx != NO_BSTIDX) { single_el_list[0] = mgau->mgau[s].bstidx; last_bstidx[s] = mgau->mgau[s].bstidx; last_scr[s] = mgau_eval(mgau, s, single_el_list, kb->feat[f][0], f, 0); } else { last_bstidx[s] = NO_BSTIDX; } /*2, Compute the exact scores and sort them and get the ranking. */ kb->ascr->senscr[s] = mgau_eval(mgau, s, NULL, kb->feat[f][0], f, 1); /*3, Compute the approximate scores with the current best index */ if (mgau->mgau[s].bstidx != NO_BSTIDX) { single_el_list[0] = mgau->mgau[s].bstidx; cur_bstidx[s] = mgau->mgau[s].bstidx; cur_scr[s] = mgau_eval(mgau, s, single_el_list, kb->feat[f][0], f, 0); } else { cur_bstidx[s] = NO_BSTIDX; } /* Only test for CD senones, test for best index hit */ /*Update either CI senone and CD senone) */ if (!mdef_is_cisenone(mdef, s)) stptr = &cur_cd_st; else stptr = &cur_ci_st; increment_stat(stptr, abs(last_scr[s] - kb->ascr->senscr[s]), abs(cur_scr[s] - kb->ascr->senscr[s]), abs(kb->ascr->senscr[cd2cisen[s]] - kb->ascr->senscr[s]), (cur_bstidx[s] == last_bstidx[s])); if (!mdef_is_cisenone(mdef, s)) { stptr = &cur_sen_st[cd2cisen[s]]; increment_stat(stptr, abs(last_scr[s] - kb->ascr->senscr[s]), abs(cur_scr[s] - kb->ascr->senscr[s]), abs(kb->ascr->senscr[cd2cisen[s]] - kb->ascr->senscr[s]), (cur_bstidx[s] == last_bstidx[s])); stptr->total_senone += 1; } } cur_cd_st.total_fr++; cur_cd_st.total_senone += mdef->n_sen - mdef->n_ci_sen; cur_ci_st.total_fr++; cur_ci_st.total_senone += mdef->n_ci_sen; for (s = 0; s < mdef->n_ci_sen; s++) { cur_sen_st[s].total_fr++; } /*This is the part we need to do sorting */ /*1, sort the scores in the current frames */ /*E_INFO("At frame %d\n",f); */ /*Pointer trick at here. */ for (s = 0; s < mdef->n_sen - mdef->n_ci_sen; s++) { idx[s] = s; } cd = &(kb->ascr->senscr[mdef->n_ci_sen]); qsort(idx, mdef->n_sen - mdef->n_ci_sen, sizeof(int32), intcmp_gmm_compute); /*This loop is stupid and it is just a hack. */ for (s = 0; s < mdef->n_sen - mdef->n_ci_sen; s++) { tmpint = idx[s] + mdef->n_ci_sen; for (t = 0; t < (int32) ((float) (mdef->n_sen - mdef->n_ci_sen) / (float) NBEST_STEP); t++) { if (s < t * NBEST_STEP) { increment_stat(&cur_sen_Nbest_st[t], abs(last_scr[tmpint] - kb->ascr->senscr[tmpint]), abs(cur_scr[tmpint] - kb->ascr->senscr[tmpint]), abs(kb->ascr->senscr[cd2cisen[tmpint]] - kb->ascr->senscr[tmpint]), (cur_bstidx[tmpint] == last_bstidx[tmpint])); cur_sen_Nbest_st[t].total_senone += 1; } } } for (t = 0; t < (int32) ((float) (mdef->n_sen - mdef->n_ci_sen) / (float) NBEST_STEP); t++) { cur_sen_Nbest_st[t].total_fr++; } } print_stat(&cur_cd_st); print_stat(&cur_ci_st); print_stat(&cur_sen_Nbest_st[1]); /*Only show the first NBEST_STEP best */ add_stat(&cd_st, &cur_cd_st); add_stat(&ci_st, &cur_ci_st); for (s = 0; s < mdef->n_ci_sen; s++) { add_stat(&sen_st[s], &cur_sen_st[s]); } for (s = 0; s < (int32) (mdef->n_sen - mdef->n_ci_sen) / NBEST_STEP; s++) { add_stat(&sen_Nbest_st[s], &cur_sen_Nbest_st[s]); } ckd_free(idx); ckd_free(cur_bstidx); ckd_free(last_bstidx); ckd_free(cur_scr); ckd_free(last_scr); }