static void feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; int32 i; assert(fcb); assert(feat_n_stream(fcb) == 1); assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2); assert(feat_window_size(fcb) == 2); /* CEP */ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); /* * DCEP: mfc[2] - mfc[-2]; */ f = feat[0] + feat_cepsize(fcb); w = mfc[2]; _w = mfc[-2]; for (i = 0; i < feat_cepsize(fcb); i++) f[i] = w[i] - _w[i]; }
static void feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; mfcc_t *w1, *w_1, *_w1, *_w_1; mfcc_t d1, d2; int32 i; assert(fcb); assert(feat_n_stream(fcb) == 1); assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4); assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2); /* CEP */ memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); /* * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; */ f = feat[0] + feat_cepsize(fcb); w = mfc[FEAT_DCEP_WIN]; _w = mfc[-FEAT_DCEP_WIN]; for (i = 0; i < feat_cepsize(fcb); i++) f[i] = w[i] - _w[i]; /* * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2; */ f += feat_cepsize(fcb); w = mfc[FEAT_DCEP_WIN * 2]; _w = mfc[-FEAT_DCEP_WIN * 2]; for (i = 0; i < feat_cepsize(fcb); i++) f[i] = w[i] - _w[i]; /* * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), * where w = FEAT_DCEP_WIN */ f += feat_cepsize(fcb); w1 = mfc[FEAT_DCEP_WIN + 1]; _w1 = mfc[-FEAT_DCEP_WIN + 1]; w_1 = mfc[FEAT_DCEP_WIN - 1]; _w_1 = mfc[-FEAT_DCEP_WIN - 1]; for (i = 0; i < feat_cepsize(fcb); i++) { d1 = w1[i] - _w1[i]; d2 = w_1[i] - _w_1[i]; f[i] = d1 - d2; } }
static void feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; mfcc_t *w1, *w_1, *_w1, *_w_1; mfcc_t d1, d2; int32 i; assert(fcb); assert(feat_cepsize(fcb) == 13); assert(feat_n_stream(fcb) == 1); assert(feat_stream_len(fcb, 0) == 39); assert(feat_window_size(fcb) == 3); /* CEP; skip C0 */ memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); /* * DCEP: mfc[2] - mfc[-2]; */ f = feat[0] + feat_cepsize(fcb) - 1; w = mfc[2] + 1; /* +1 to skip C0 */ _w = mfc[-2] + 1; for (i = 0; i < feat_cepsize(fcb) - 1; i++) f[i] = w[i] - _w[i]; /* POW: C0, DC0, D2C0 */ f += feat_cepsize(fcb) - 1; f[0] = mfc[0][0]; f[1] = mfc[2][0] - mfc[-2][0]; d1 = mfc[3][0] - mfc[-1][0]; d2 = mfc[1][0] - mfc[-3][0]; f[2] = d1 - d2; /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ f += 3; w1 = mfc[3] + 1; /* Final +1 to skip C0 */ _w1 = mfc[-1] + 1; w_1 = mfc[1] + 1; _w_1 = mfc[-3] + 1; for (i = 0; i < feat_cepsize(fcb) - 1; i++) { d1 = w1[i] - _w1[i]; d2 = w_1[i] - _w_1[i]; f[i] = d1 - d2; } }
static void feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { int32 win, i, j; win = feat_window_size(fcb); /* Concatenate input features */ for (i = -win; i <= win; ++i) { uint32 spos = 0; for (j = 0; j < feat_n_stream(fcb); ++j) { uint32 stream_len; /* Unscale the stream length by the window. */ stream_len = feat_stream_len(fcb, j) / (2 * win + 1); memcpy(feat[j] + ((i + win) * stream_len), mfc[i] + spos, stream_len * sizeof(mfcc_t)); spos += stream_len; } } }
static void feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) { mfcc_t *f; mfcc_t *w, *_w; mfcc_t *w1, *w_1, *_w1, *_w_1; mfcc_t d1, d2; int32 i, j; assert(fcb); assert(feat_cepsize(fcb) == 13); assert(feat_n_stream(fcb) == 4); assert(feat_stream_len(fcb, 0) == 12); assert(feat_stream_len(fcb, 1) == 24); assert(feat_stream_len(fcb, 2) == 3); assert(feat_stream_len(fcb, 3) == 12); assert(feat_window_size(fcb) == 4); /* CEP; skip C0 */ memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); /* * DCEP(SHORT): mfc[2] - mfc[-2] * DCEP(LONG): mfc[4] - mfc[-4] */ w = mfc[2] + 1; /* +1 to skip C0 */ _w = mfc[-2] + 1; f = feat[1]; for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */ f[i] = w[i] - _w[i]; w = mfc[4] + 1; /* +1 to skip C0 */ _w = mfc[-4] + 1; for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */ f[i] = w[j] - _w[j]; /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ w1 = mfc[3] + 1; /* Final +1 to skip C0 */ _w1 = mfc[-1] + 1; w_1 = mfc[1] + 1; _w_1 = mfc[-3] + 1; f = feat[3]; for (i = 0; i < feat_cepsize(fcb) - 1; i++) { d1 = w1[i] - _w1[i]; d2 = w_1[i] - _w_1[i]; f[i] = d1 - d2; } /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */ f = feat[2]; f[0] = mfc[0][0]; f[1] = mfc[2][0] - mfc[-2][0]; d1 = mfc[3][0] - mfc[-1][0]; d2 = mfc[1][0] - mfc[-3][0]; f[2] = d1 - d2; }
kbcore_t *kbcore_init (float64 logbase, char *feattype, char *cmn, char *varnorm, char *agc, char *mdeffile, char *dictfile, char *fdictfile, char *compsep, char *lmfile, char *fillpenfile, float64 silprob, float64 fillprob, float64 langwt, float64 inspen, float64 uw, char *meanfile, char *varfile, float64 varfloor, char *mixwfile, float64 mixwfloor, char *subvqfile, char *tmatfile, float64 tmatfloor) { kbcore_t *kb; E_INFO("Initializing core models:\n"); kb = (kbcore_t *) ckd_calloc (1, sizeof(kbcore_t)); kb->fcb = NULL; kb->mdef = NULL; kb->dict = NULL; kb->dict2pid = NULL; kb->lm = NULL; kb->fillpen = NULL; kb->dict2lmwid = NULL; kb->mgau = NULL; kb->svq = NULL; kb->tmat = NULL; logs3_init (logbase); if (feattype) { if ((kb->fcb = feat_init (feattype, cmn, varnorm, agc)) == NULL) E_FATAL("feat_init(%s) failed\n", feattype); if (feat_n_stream(kb->fcb) != 1) E_FATAL("#Feature streams(%d) != 1\n", feat_n_stream(kb->fcb)); } if (mdeffile) { if ((kb->mdef = mdef_init (mdeffile)) == NULL) E_FATAL("mdef_init(%s) failed\n", mdeffile); } if (dictfile) { if (! compsep) compsep = ""; else if ((compsep[0] != '\0') && (compsep[1] != '\0')) { E_FATAL("Compound word separator(%s) must be empty or single character string\n", compsep); } if ((kb->dict = dict_init (kb->mdef, dictfile, fdictfile, compsep[0])) == NULL) E_FATAL("dict_init(%s,%s,%s) failed\n", dictfile, fdictfile ? fdictfile : "", compsep); } if (lmfile) { if ((kb->lm = lm_read (lmfile, langwt, inspen, uw)) == NULL) E_FATAL("lm_read(%s, %e, %e, %e) failed\n", lmfile, langwt, inspen, uw); } if (fillpenfile || (lmfile && kb->dict)) { if (! kb->dict) /* Sic */ E_FATAL("No dictionary for associating filler penalty file(%s)\n", fillpenfile); if ((kb->fillpen = fillpen_init (kb->dict, fillpenfile, silprob, fillprob, langwt, inspen)) == NULL) E_FATAL("fillpen_init(%s) failed\n", fillpenfile); } if (meanfile) { if ((! varfile) || (! mixwfile)) E_FATAL("Varfile or mixwfile not specified along with meanfile(%s)\n", meanfile); kb->mgau = mgau_init (meanfile, varfile, varfloor, mixwfile, mixwfloor, TRUE); if (kb->mgau == NULL) E_FATAL("gauden_init(%s, %s, %e) failed\n", meanfile, varfile, varfloor); if (subvqfile) { if ((kb->svq = subvq_init (subvqfile, varfloor, -1, kb->mgau)) == NULL) E_FATAL("subvq_init (%s, %e, -1) failed\n", subvqfile, varfloor); } } if (tmatfile) { if ((kb->tmat = tmat_init (tmatfile, tmatfloor)) == NULL) E_FATAL("tmat_init (%s, %e) failed\n", tmatfile, tmatfloor); } if (kb->dict && kb->lm) { /* Initialize dict2lmwid */ if ((kb->dict2lmwid = wid_dict_lm_map (kb->dict, kb->lm)) == NULL) E_FATAL("Dict/LM word-id mapping failed\n"); } if (kb->mdef && kb->dict) { /* Initialize dict2pid */ kb->dict2pid = dict2pid_build (kb->mdef, kb->dict); } /* ***************** Verifications ***************** */ E_INFO("Verifying models consistency:\n"); if (kb->fcb && kb->mgau) { /* Verify feature streams against gauden codebooks */ if (feat_stream_len(kb->fcb, 0) != mgau_veclen(kb->mgau)) E_FATAL("Feature streamlen(%d) != mgau streamlen(%d)\n", feat_stream_len(kb->fcb, 0), mgau_veclen(kb->mgau)); } if (kb->mdef && kb->mgau) { /* Verify senone parameters against model definition parameters */ if (kb->mdef->n_sen != mgau_n_mgau(kb->mgau)) E_FATAL("Mdef #senones(%d) != mgau #senones(%d)\n", kb->mdef->n_sen, mgau_n_mgau(kb->mgau)); } if (kb->mdef && kb->tmat) { /* Verify transition matrices parameters against model definition parameters */ if (kb->mdef->n_tmat != kb->tmat->n_tmat) E_FATAL("Mdef #tmat(%d) != tmatfile(%d)\n", kb->mdef->n_tmat, kb->tmat->n_tmat); if (kb->mdef->n_emit_state != kb->tmat->n_state) E_FATAL("Mdef #states(%d) != tmat #states(%d)\n", kb->mdef->n_emit_state, kb->tmat->n_state); } return kb; }
acoustic_t *acoustic_init (feat_t *f, gauden_t *g, senone_t *s, float64 beam, int32 maxfr) { acoustic_t *am; int32 i; if (senone_n_mgau(s) != gauden_n_mgau(g)) { E_ERROR("#Parent mixture Gaussians mismatch: senone(%d), gauden(%d)\n", senone_n_mgau(s), gauden_n_mgau(g)); } if (feat_n_stream(f) != senone_n_stream(s)) { E_ERROR("#Feature-streams mismatch: feat(%d), senone(%d)\n", feat_n_stream(f), senone_n_stream(s)); } if (feat_n_stream(f) != gauden_n_stream(g)) { E_ERROR("#Feature-streams mismatch: feat(%d), gauden(%d)\n", feat_n_stream(f), gauden_n_stream(g)); return NULL; } for (i = 0; i < feat_n_stream(f); i++) { if (feat_stream_len(f, i) != gauden_stream_len(g, i)) { E_ERROR("Feature stream(%d) length mismatch: feat(%d), gauden(%d)\n", feat_stream_len(f, i), gauden_stream_len(g, i)); return NULL; } } if (beam > 1.0) { E_ERROR("mgaubeam > 1.0 (%e)\n", beam); return NULL; } am = (acoustic_t *) ckd_calloc (1, sizeof(acoustic_t)); am->fcb = f; am->gau = g; am->sen = s; am->mgaubeam = (beam == 0.0) ? LOGPROB_ZERO : logs3(beam); if (am->mgaubeam > 0) am->mgaubeam = 0; am->tot_mgau_eval = 0; am->tot_dist_valid = 0.0; am->dist_valid = (am->mgaubeam <= LOGPROB_ZERO) ? NULL : (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32)); if (f->compute_feat) { /* Input is MFC cepstra; feature vectors computed from that */ am->mfc = (float32 **) ckd_calloc_2d (maxfr, feat_cepsize(am->fcb), sizeof(float32)); am->feat = feat_array_alloc (f, 1); } else { /* Input is directly feature vectors */ am->mfc = NULL; am->feat = feat_array_alloc (f, maxfr); } am->dist = (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32)); am->gauden_active = bitvec_alloc (g->n_mgau); am->senscr = (int32 *) ckd_calloc (s->n_sen, sizeof(int32)); am->senscale = (int32 *) ckd_calloc (maxfr, sizeof(int32)); am->sen_active = bitvec_alloc (s->n_sen); return am; }