static void feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; int32 i; assert(fcb); assert(feat_n_stream(fcb) == 1); assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2); assert(feat_window_size(fcb) == 2); /* CEP */ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); /* * DCEP: mfc[2] - mfc[-2]; */ f = feat[0] + feat_cepsize(fcb); w = mfc[2]; _w = mfc[-2]; for (i = 0; i < feat_cepsize(fcb); i++) f[i] = w[i] - _w[i]; }
static int32 feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep, int32 nfr, mfcc_t *** ofeat) { mfcc_t **cepbuf; int32 i, win, cepsize; win = feat_window_size(fcb); cepsize = feat_cepsize(fcb); /* Copy and pad out the utterance (this requires that the * feature computation functions always access the buffer via * the frame pointers, which they do) */ cepbuf = (mfcc_t**) ckd_calloc(nfr + win * 2, sizeof(mfcc_t *)); memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *)); /* Do normalization before we interpolate on the boundary */ feat_cmn(fcb, cepbuf + win, nfr, 1, 1); feat_agc(fcb, cepbuf + win, nfr, 1, 1); /* Now interpolate */ for (i = 0; i < win; ++i) { cepbuf[i] = fcb->cepbuf[i]; memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t)); cepbuf[nfr + win + i] = fcb->cepbuf[win + i]; memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t)); } /* Compute as usual. */ feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat); ckd_free(cepbuf); return nfr; }
static int acmod_log_mfc(acmod_t *acmod, mfcc_t **cep, int n_frames) { int i, n; int32 *ptr = (int32 *)cep[0]; n = n_frames * feat_cepsize(acmod->fcb); /* Swap bytes. */ if (!WORDS_BIGENDIAN) { for (i = 0; i < (n * sizeof(mfcc_t)); ++i) { SWAP_INT32(ptr + i); } } /* Write features. */ if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) { E_ERROR_SYSTEM("Failed to write %d values to log file", n); } /* Swap them back. */ if (!WORDS_BIGENDIAN) { for (i = 0; i < (n * sizeof(mfcc_t)); ++i) { SWAP_INT32(ptr + i); } } return 0; }
static void feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; mfcc_t *w1, *w_1, *_w1, *_w_1; mfcc_t d1, d2; int32 i; assert(fcb); assert(feat_n_stream(fcb) == 1); assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4); assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2); /* CEP */ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); /* * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; */ f = feat[0] + feat_cepsize(fcb); w = mfc[FEAT_DCEP_WIN]; _w = mfc[-FEAT_DCEP_WIN]; for (i = 0; i < feat_cepsize(fcb); i++) f[i] = w[i] - _w[i]; /* * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2; */ f += feat_cepsize(fcb); w = mfc[FEAT_DCEP_WIN * 2]; _w = mfc[-FEAT_DCEP_WIN * 2]; for (i = 0; i < feat_cepsize(fcb); i++) f[i] = w[i] - _w[i]; /* * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), * where w = FEAT_DCEP_WIN */ f += feat_cepsize(fcb); w1 = mfc[FEAT_DCEP_WIN + 1]; _w1 = mfc[-FEAT_DCEP_WIN + 1]; w_1 = mfc[FEAT_DCEP_WIN - 1]; _w_1 = mfc[-FEAT_DCEP_WIN - 1]; for (i = 0; i < feat_cepsize(fcb); i++) { d1 = w1[i] - _w1[i]; d2 = w_1[i] - _w_1[i]; f[i] = d1 - d2; } }
static void feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { assert(fcb); assert(feat_n_stream(fcb) == 1); assert(feat_window_size(fcb) == 0); /* CEP */ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); }
int acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb) { /* Feature type needs to be the same. */ if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb))) return TRUE; /* Input vector dimension needs to be the same. */ if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb)) return TRUE; /* FIXME: Need to check LDA and stuff too. */ return FALSE; }
static void feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; mfcc_t *w1, *w_1, *_w1, *_w_1; mfcc_t d1, d2; int32 i; assert(fcb); assert(feat_cepsize(fcb) == 13); assert(feat_n_stream(fcb) == 1); assert(feat_stream_len(fcb, 0) == 39); assert(feat_window_size(fcb) == 3); /* CEP; skip C0 */ memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); /* * DCEP: mfc[2] - mfc[-2]; */ f = feat[0] + feat_cepsize(fcb) - 1; w = mfc[2] + 1; /* +1 to skip C0 */ _w = mfc[-2] + 1; for (i = 0; i < feat_cepsize(fcb) - 1; i++) f[i] = w[i] - _w[i]; /* POW: C0, DC0, D2C0 */ f += feat_cepsize(fcb) - 1; f[0] = mfc[0][0]; f[1] = mfc[2][0] - mfc[-2][0]; d1 = mfc[3][0] - mfc[-1][0]; d2 = mfc[1][0] - mfc[-3][0]; f[2] = d1 - d2; /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ f += 3; w1 = mfc[3] + 1; /* Final +1 to skip C0 */ _w1 = mfc[-1] + 1; w_1 = mfc[1] + 1; _w_1 = mfc[-3] + 1; for (i = 0; i < feat_cepsize(fcb) - 1; i++) { d1 = w1[i] - _w1[i]; d2 = w_1[i] - _w_1[i]; f[i] = d1 - d2; } }
feat_t * feat_init(char const *type, cmn_type_t cmn, int32 varnorm, agc_type_t agc, int32 breport, int32 cepsize) { feat_t *fcb; if (cepsize == 0) cepsize = 13; if (breport) E_INFO ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n", type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]); fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t)); fcb->refcount = 1; fcb->name = (char *) ckd_salloc(type); if (strcmp(type, "s2_4x") == 0) { /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */ if (cepsize != 13) { E_ERROR("s2_4x features require cepsize == 13\n"); ckd_free(fcb); return 0; } fcb->cepsize = 13; fcb->n_stream = 4; fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32)); fcb->stream_len[0] = 12; fcb->stream_len[1] = 24; fcb->stream_len[2] = 3; fcb->stream_len[3] = 12; fcb->out_dim = 51; fcb->window_size = 4; fcb->compute_feat = feat_s2_4x_cep2feat; } else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) { /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */ if (cepsize != 13) { E_ERROR("s2_4x features require cepsize == 13\n"); ckd_free(fcb); return 0; } fcb->cepsize = 13; fcb->n_stream = 1; fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); fcb->stream_len[0] = 39; fcb->out_dim = 39; fcb->window_size = 3; fcb->compute_feat = feat_s3_1x39_cep2feat; } else if (strncmp(type, "1s_c_d_dd", 9) == 0) { fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); fcb->stream_len[0] = cepsize * 3; fcb->out_dim = cepsize * 3; fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */ fcb->compute_feat = feat_1s_c_d_dd_cep2feat; } else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) { fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); fcb->stream_len[0] = cepsize * 4; fcb->out_dim = cepsize * 4; fcb->window_size = FEAT_DCEP_WIN * 2; fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat; } else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) { /* 1-stream cep/dcep */ fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); fcb->stream_len[0] = feat_cepsize(fcb) * 2; fcb->out_dim = fcb->stream_len[0]; fcb->window_size = 2; fcb->compute_feat = feat_s3_cep_dcep; } else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) { /* 1-stream cep */ fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); fcb->stream_len[0] = feat_cepsize(fcb); fcb->out_dim = fcb->stream_len[0]; fcb->window_size = 0; fcb->compute_feat = feat_s3_cep; } else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) { /* 1-stream cep with frames concatenated, so called cepwin features */ if (strncmp(type, "1s_3c", 5) == 0) fcb->window_size = 3; else fcb->window_size = 4; fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1); fcb->out_dim = fcb->stream_len[0]; fcb->compute_feat = feat_copy; } else { int32 i, l, k; char *strp; char *mtype = ckd_salloc(type); char *wd = ckd_salloc(type); /* * Generic definition: Format should be %d,%d,%d,...,%d (i.e., * comma separated list of feature stream widths; #items = * #streams). An optional window size (frames will be * concatenated) is also allowed, which can be specified with * a colon after the list of feature streams. */ l = strlen(mtype); k = 0; for (i = 1; i < l - 1; i++) { if (mtype[i] == ',') { mtype[i] = ' '; k++; } else if (mtype[i] == ':') { mtype[i] = '\0'; fcb->window_size = atoi(mtype + i + 1); break; } } k++; /* Presumably there are (#commas+1) streams */ fcb->n_stream = k; fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32)); /* Scan individual feature stream lengths */ strp = mtype; i = 0; fcb->out_dim = 0; fcb->cepsize = 0; #ifndef POCKETSPHINX_NET while (sscanf(strp, "%s%n", wd, &l) == 1) #else while (net_sscanf_word(strp, wd, &l) == 1) #endif { strp += l; if ((i >= fcb->n_stream) || #ifndef POCKETSPHINX_NET (sscanf(wd, "%d", &(fcb->stream_len[i])) != 1) #else UInt32::TryParse(gcnew String(wd), fcb->stream_len[i]) #endif || (fcb->stream_len[i] <= 0)) E_FATAL("Bad feature type argument\n"); /* Input size before windowing */ fcb->cepsize += fcb->stream_len[i]; if (fcb->window_size > 0) fcb->stream_len[i] *= (fcb->window_size * 2 + 1); /* Output size after windowing */ fcb->out_dim += fcb->stream_len[i]; i++; } if (i != fcb->n_stream) E_FATAL("Bad feature type argument\n"); if (fcb->cepsize != cepsize) E_FATAL("Bad feature type argument\n"); /* Input is already the feature stream */ fcb->compute_feat = feat_copy; ckd_free(mtype); ckd_free(wd); } if (cmn != CMN_NONE) fcb->cmn_struct = cmn_init(feat_cepsize(fcb)); fcb->cmn = cmn; fcb->varnorm = varnorm; if (agc != AGC_NONE) { fcb->agc_struct = agc_init(); /* * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY * switches to EMAX */ /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */ agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0); } fcb->agc = agc; /* * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt() */ fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE, feat_cepsize(fcb), sizeof(mfcc_t)); /* This one is actually just an array of pointers to "flatten out" * wraparounds. */ fcb->tmpcepbuf = (mfcc_t**)ckd_calloc(2 * feat_window_size(fcb) + 1, sizeof(*fcb->tmpcepbuf)); return fcb; }
static void feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; mfcc_t *w1, *w_1, *_w1, *_w_1; mfcc_t d1, d2; int32 i, j; assert(fcb); assert(feat_cepsize(fcb) == 13); assert(feat_n_stream(fcb) == 4); assert(feat_stream_len(fcb, 0) == 12); assert(feat_stream_len(fcb, 1) == 24); assert(feat_stream_len(fcb, 2) == 3); assert(feat_stream_len(fcb, 3) == 12); assert(feat_window_size(fcb) == 4); /* CEP; skip C0 */ memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); /* * DCEP(SHORT): mfc[2] - mfc[-2] * DCEP(LONG): mfc[4] - mfc[-4] */ w = mfc[2] + 1; /* +1 to skip C0 */ _w = mfc[-2] + 1; f = feat[1]; for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */ f[i] = w[i] - _w[i]; w = mfc[4] + 1; /* +1 to skip C0 */ _w = mfc[-4] + 1; for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */ f[i] = w[j] - _w[j]; /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ w1 = mfc[3] + 1; /* Final +1 to skip C0 */ _w1 = mfc[-1] + 1; w_1 = mfc[1] + 1; _w_1 = mfc[-3] + 1; f = feat[3]; for (i = 0; i < feat_cepsize(fcb) - 1; i++) { d1 = w1[i] - _w1[i]; d2 = w_1[i] - _w_1[i]; f[i] = d1 - d2; } /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */ f = feat[2]; f[0] = mfc[0][0]; f[1] = mfc[2][0] - mfc[-2][0]; d1 = mfc[3][0] - mfc[-1][0]; d2 = mfc[1][0] - mfc[-3][0]; f[2] = d1 - d2; }
int32 feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep, int32 beginutt, int32 endutt, mfcc_t *** ofeat) { int32 win, cepsize, nbufcep; int32 i, j, nfeatvec; int32 zero = 0; /* Avoid having to check this everywhere. */ if (inout_ncep == 0) inout_ncep = &zero; /* Special case for entire utterances. */ if (beginutt && endutt && *inout_ncep > 0) return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat); win = feat_window_size(fcb); cepsize = feat_cepsize(fcb); /* Empty the input buffer on start of utterance. */ if (beginutt) fcb->bufpos = fcb->curpos; /* Calculate how much data is in the buffer already. */ nbufcep = fcb->bufpos - fcb->curpos; if (nbufcep < 0) nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos; /* Add any data that we have to replicate. */ if (beginutt && *inout_ncep > 0) nbufcep += win; if (endutt) nbufcep += win; /* Only consume as much input as will fit in the buffer. */ if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) { /* We also can't overwrite the trailing window, hence the * reason why win is subtracted here. */ *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win; /* Cancel end of utterance processing. */ endutt = FALSE; } /* FIXME: Don't modify the input! */ feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt); feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt); /* Replicate first frame into the first win frames if we're at the * beginning of the utterance and there was some actual input to * deal with. (FIXME: Not entirely sure why that condition) */ if (beginutt && *inout_ncep > 0) { for (i = 0; i < win; i++) { memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0], cepsize * sizeof(mfcc_t)); fcb->bufpos %= LIVEBUFBLOCKSIZE; } /* Move the current pointer past this data. */ fcb->curpos = fcb->bufpos; nbufcep -= win; } /* Copy in frame data to the circular buffer. */ for (i = 0; i < *inout_ncep; ++i) { memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i], cepsize * sizeof(mfcc_t)); fcb->bufpos %= LIVEBUFBLOCKSIZE; ++nbufcep; } /* Replicate last frame into the last win frames if we're at the * end of the utterance (even if there was no input, so we can * flush the output). */ if (endutt) { int32 tpos; /* Index of last input frame. */ if (fcb->bufpos == 0) tpos = LIVEBUFBLOCKSIZE - 1; else tpos = fcb->bufpos - 1; for (i = 0; i < win; ++i) { memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos], cepsize * sizeof(mfcc_t)); fcb->bufpos %= LIVEBUFBLOCKSIZE; } } /* We have to leave the trailing window of frames. */ nfeatvec = nbufcep - win; if (nfeatvec <= 0) return 0; /* Do nothing. */ for (i = 0; i < nfeatvec; ++i) { /* Handle wraparound cases. */ if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) { /* Use tmpcepbuf for this case. Actually, we just need the pointers. */ for (j = -win; j <= win; ++j) { int32 tmppos = (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE; fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos]; } fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]); } else { fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]); } /* Move the read pointer forward. */ ++fcb->curpos; fcb->curpos %= LIVEBUFBLOCKSIZE; } if (fcb->lda) feat_lda_transform(fcb, ofeat, nfeatvec); if (fcb->subvecs) feat_subvec_project(fcb, ofeat, nfeatvec); return nfeatvec; }
main (int32 argc, char *argv[]) { char *str; #if 0 ckd_debug(100000); #endif E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); /* Digest command line argument definitions */ cmd_ln_define (defn); if ((argc == 2) && (strcmp (argv[1], "help") == 0)) { cmd_ln_print_definitions(); exit(1); } /* Look for default or specified arguments file */ str = NULL; if ((argc == 2) && (argv[1][0] != '-')) str = argv[1]; else if (argc == 1) { str = "s3align.arg"; E_INFO("Looking for default argument file: %s\n", str); } if (str) { /* Build command line argument list from file */ if ((argc = load_argfile (str, argv[0], &argv)) < 0) { fprintf (stderr, "Usage:\n"); fprintf (stderr, "\t%s argument-list, or\n", argv[0]); fprintf (stderr, "\t%s [argument-file] (default file: s3align.arg)\n\n", argv[0]); cmd_ln_print_definitions(); exit(1); } } cmdline_parse (argc, argv); if ((cmd_ln_access("-mdeffn") == NULL) || (cmd_ln_access("-meanfn") == NULL) || (cmd_ln_access("-varfn") == NULL) || (cmd_ln_access("-mixwfn") == NULL) || (cmd_ln_access("-tmatfn") == NULL) || (cmd_ln_access("-dictfn") == NULL)) E_FATAL("Missing -mdeffn, -meanfn, -varfn, -mixwfn, -tmatfn, or -dictfn argument\n"); if ((cmd_ln_access("-ctlfn") == NULL) || (cmd_ln_access("-insentfn") == NULL)) E_FATAL("Missing -ctlfn or -insentfn argument\n"); if ((cmd_ln_access ("-s2stsegdir") == NULL) && (cmd_ln_access ("-stsegdir") == NULL) && (cmd_ln_access ("-phsegdir") == NULL) && (cmd_ln_access ("-wdsegdir") == NULL) && (cmd_ln_access ("-outsentfn") == NULL)) E_FATAL("Missing output file/directory argument(s)\n"); tm_utt = timing_new (); /* * Initialize log(S3-base). All scores (probs...) computed in log domain to avoid * underflow. At the same time, log base = 1.0001 (1+epsilon) to allow log values * to be maintained in int32 variables without significant loss of precision. */ if (cmd_ln_access("-logbase") == NULL) logs3_init (1.0001); else { float32 logbase; logbase = *((float32 *) cmd_ln_access("-logbase")); if (logbase <= 1.0) E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase); if (logbase > 1.1) E_WARN("Logbase %e perhaps too large??\n", logbase); logs3_init ((float64) logbase); } /* Initialize feature stream type */ feat_init ((char *) cmd_ln_access ("-feat")); /* BHIKSHA: PASS CEPSIZE TO FEAT_CEPSIZE, 6 Jan 98 */ cepsize = *((int32 *) cmd_ln_access("-ceplen")); cepsize = feat_cepsize (cepsize); /* END CHANGES BY BHIKSHA */ /* Read in input databases */ models_init (); senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); tmr_utt = cyctimer_new ("U"); tmr_gauden = cyctimer_new ("G"); tmr_senone = cyctimer_new ("S"); tmr_align = cyctimer_new ("A"); /* Initialize align module */ align_init (); printf ("\n"); tot_nfr = 0; process_ctlfile (); if (tot_nfr > 0) { printf ("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_cpu, tm_utt->t_tot_cpu/(tot_nfr*0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_elapsed, tm_utt->t_tot_elapsed/(tot_nfr*0.01)); } #if (! WIN32) system ("ps aguxwww | grep s3align"); #endif /* Hack!! To avoid hanging problem under Linux */ if (logfp) { fclose (logfp); *stdout = orig_stdout; *stderr = orig_stderr; } exit(0); }
acoustic_t *acoustic_init (feat_t *f, gauden_t *g, senone_t *s, float64 beam, int32 maxfr) { acoustic_t *am; int32 i; if (senone_n_mgau(s) != gauden_n_mgau(g)) { E_ERROR("#Parent mixture Gaussians mismatch: senone(%d), gauden(%d)\n", senone_n_mgau(s), gauden_n_mgau(g)); } if (feat_n_stream(f) != senone_n_stream(s)) { E_ERROR("#Feature-streams mismatch: feat(%d), senone(%d)\n", feat_n_stream(f), senone_n_stream(s)); } if (feat_n_stream(f) != gauden_n_stream(g)) { E_ERROR("#Feature-streams mismatch: feat(%d), gauden(%d)\n", feat_n_stream(f), gauden_n_stream(g)); return NULL; } for (i = 0; i < feat_n_stream(f); i++) { if (feat_stream_len(f, i) != gauden_stream_len(g, i)) { E_ERROR("Feature stream(%d) length mismatch: feat(%d), gauden(%d)\n", feat_stream_len(f, i), gauden_stream_len(g, i)); return NULL; } } if (beam > 1.0) { E_ERROR("mgaubeam > 1.0 (%e)\n", beam); return NULL; } am = (acoustic_t *) ckd_calloc (1, sizeof(acoustic_t)); am->fcb = f; am->gau = g; am->sen = s; am->mgaubeam = (beam == 0.0) ? LOGPROB_ZERO : logs3(beam); if (am->mgaubeam > 0) am->mgaubeam = 0; am->tot_mgau_eval = 0; am->tot_dist_valid = 0.0; am->dist_valid = (am->mgaubeam <= LOGPROB_ZERO) ? NULL : (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32)); if (f->compute_feat) { /* Input is MFC cepstra; feature vectors computed from that */ am->mfc = (float32 **) ckd_calloc_2d (maxfr, feat_cepsize(am->fcb), sizeof(float32)); am->feat = feat_array_alloc (f, 1); } else { /* Input is directly feature vectors */ am->mfc = NULL; am->feat = feat_array_alloc (f, maxfr); } am->dist = (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32)); am->gauden_active = bitvec_alloc (g->n_mgau); am->senscr = (int32 *) ckd_calloc (s->n_sen, sizeof(int32)); am->senscale = (int32 *) ckd_calloc (maxfr, sizeof(int32)); am->sen_active = bitvec_alloc (s->n_sen); return am; }
static void decode_utt (void *data, char *uttfile, int32 sf, int32 ef, char *uttid) { kb_t *kb; acoustic_t *am; int32 featwin, nfr, min_utt_frames, n_vithist; char cepfile[4096], latfile[4096]; vithist_t *finalhist; int32 i, f; glist_t hyplist; FILE *latfp; printf ("\n"); fflush (stdout); E_INFO("Utterance %s\n", uttid); kb = (kb_t *)data; am = kb->am; featwin = feat_window_size(am->fcb); /* Build complete cepfile name and read cepstrum data; check for min length */ ctl_infile (cepfile, cmd_ln_str("-cepdir"), cmd_ln_str("-cepext"), uttfile); if ((nfr = s2mfc_read (cepfile, sf, ef, featwin, am->mfc, S3_MAX_FRAMES)) < 0) { E_ERROR("%s: MFC read failed\n", uttid); return; } E_INFO("%s: %d frames\n", uttid, nfr-(featwin<<1)); ptmr_reset (kb->tm); ptmr_reset (kb->tm_search); ptmr_start (kb->tm); min_utt_frames = (featwin<<1) + 1; if (nfr < min_utt_frames) { E_ERROR("%s: Utterance shorter than %d frames; ignored\n", uttid, min_utt_frames, nfr); return; } /* CMN/AGC */ if (strcmp (cmd_ln_str("-cmn"), "current") == 0) cmn (am->mfc, nfr, feat_cepsize(am->fcb)); if (strcmp (cmd_ln_str("-agc"), "max") == 0) agc_max (am->mfc, nfr); /* Process utterance */ lextree_vit_start (kb, uttid); for (i = featwin, f = 0; i < nfr-featwin; i++, f++) { am->senscale[f] = acoustic_eval (am, i); ptmr_start (kb->tm_search); lextree_vit_frame (kb, f, uttid); printf (" %d,%d,%d", f, glist_count (kb->vithist[f]), glist_count (kb->lextree_active)); fflush (stdout); ptmr_stop (kb->tm_search); } printf ("\n"); finalhist = lextree_vit_end (kb, f, uttid); hyplist = vithist_backtrace (finalhist, kb->am->senscale); hyp_log (stdout, hyplist, _dict_wordstr, (void *)kb->dict); hyp_myfree (hyplist); printf ("\n"); /* Log the entire Viterbi word lattice */ sprintf (latfile, "%s.lat", uttid); if ((latfp = fopen(latfile, "w")) == NULL) { E_ERROR("fopen(%s,w) failed; using stdout\n", latfile); latfp = stdout; } n_vithist = vithist_log (latfp, kb->vithist, f, _dict_wordstr, (void *)kb->dict); if (latfp != stdout) fclose (latfp); else { printf ("\n"); fflush (stdout); } ptmr_stop (kb->tm); if (f > 0) { printf("TMR(%s): %5d frames; %.1fs CPU, %.2f xRT; %.1fs CPU(search), %.2f xRT; %.1fs Elapsed, %.2f xRT\n", uttid, f, kb->tm->t_cpu, kb->tm->t_cpu * 100.0 / f, kb->tm_search->t_cpu, kb->tm_search->t_cpu * 100.0 / f, kb->tm->t_elapsed, kb->tm->t_elapsed * 100.0 / f); printf("CTR(%s): %5d frames; %d Sen (%.1f/fr); %d HMM (%.1f/fr); %d Words (%.1f/fr)\n", uttid, f, kb->n_sen_eval, ((float64)kb->n_sen_eval) / f, kb->n_hmm_eval, ((float64)kb->n_hmm_eval) / f, n_vithist, ((float64) n_vithist) / f); } /* Cleanup */ glist_free (kb->lextree_active); kb->lextree_active = NULL; for (; f >= -1; --f) { /* I.e., including dummy START_WORD node at frame -1 */ glist_myfree (kb->vithist[f], sizeof(vithist_t)); kb->vithist[f] = NULL; } lm_cache_reset (kb->lm); }