static int acmod_init_feat(acmod_t *acmod) { acmod->fcb = feat_init(cmd_ln_str_r(acmod->config, "-feat"), cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), cmd_ln_boolean_r(acmod->config, "-varnorm"), agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), 1, cmd_ln_int32_r(acmod->config, "-ceplen")); if (acmod->fcb == NULL) return -1; if (cmd_ln_str_r(acmod->config, "-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str_r(acmod->config, "-lda")); if (feat_read_lda(acmod->fcb, cmd_ln_str_r(acmod->config, "-lda"), cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) return -1; } if (cmd_ln_str_r(acmod->config, "-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str_r(acmod->config, "-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) return -1; if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0) return -1; } if (cmd_ln_exists_r(acmod->config, "-agcthresh") && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { agc_set_threshold(acmod->fcb->agc_struct, cmd_ln_float32_r(acmod->config, "-agcthresh")); } if (acmod->fcb->cmn_struct && cmd_ln_exists_r(acmod->config, "-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); c = vallist; nvals = 0; while (nvals < acmod->fcb->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); c = cc + 1; ++nvals; } if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') { acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); } ckd_free(vallist); } return 0; }
static void cmn_prior_shiftwin(cmn_t *cmn) { mfcc_t sf; int32 i; E_INFO("cmn_prior_update: from < "); for (i = 0; i < cmn->veclen; i++) E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); E_INFOCONT(">\n"); sf = FLOAT2MFCC(1.0) / cmn->nframe; for (i = 0; i < cmn->veclen; i++) cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf */ /* Make the accumulation decay exponentially */ if (cmn->nframe >= CMN_WIN_HWM) { sf = CMN_WIN * sf; for (i = 0; i < cmn->veclen; i++) cmn->sum[i] = MFCCMUL(cmn->sum[i], sf); cmn->nframe = CMN_WIN; } E_INFO("cmn_prior_update: to < "); for (i = 0; i < cmn->veclen; i++) E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); E_INFOCONT(">\n"); }
agc_t *agc_init(void) { agc_t *agc; agc = ckd_calloc(1, sizeof(*agc)); agc->noise_thresh = FLOAT2MFCC(2.0); return agc; }
static int32 read_kd_nodes(FILE * fp, kd_tree_t * tree, uint32 maxdepth, int32 maxbbi) { uint32 i, j, in, out; int32 ilevel, olevel; /* Balanced binary trees, so we have 2^nlevels-1 nodes. */ if (maxdepth == 0 || maxdepth > tree->n_level) maxdepth = tree->n_level; in = (1 << tree->n_level) - 1; out = (1 << maxdepth) - 1; tree->nodes = ckd_calloc(out, sizeof(kd_tree_node_t)); /* Nodes are read in depth-first ordering. */ for (j = i = 0; i < in; ++i) { float32 split_plane; int32 split_comp; if (read_tree_int(fp, "NODE", &ilevel, FALSE) < 0) break; if (read_tree_int(fp, "split_comp", &split_comp, FALSE) < 0) return -1; if (read_tree_float(fp, "split_plane", &split_plane, FALSE) < 0) return -1; olevel = ilevel - (tree->n_level - maxdepth); if (olevel > 0) { /* Only create a node if we are above maxdepth */ assert(j < out); tree->nodes[j].split_comp = split_comp; tree->nodes[j].split_plane = FLOAT2MFCC(split_plane); /* We only need the BBI list for leafnodes now. */ if (olevel == 1) { if (read_bbi_list(fp, tree->nodes + j, maxbbi) < 0) return -1; } else { if (read_bbi_list(fp, NULL, 0) < 0) return -1; } /* They are also full trees, hence: */ if (olevel > 1) { tree->nodes[j].left = j + 1; tree->nodes[j].right = j + (1 << (olevel - 1)); } ++j; } else { /* Have to read the BBI list anyway. */ if (read_bbi_list(fp, NULL, 0) < 0) return -1; } } E_INFO("Read %d nodes\n", j); return 0; }
/** * Convert a block of float32 to mfcc_t (can be done in-place) **/ int32 fe_float_to_mfcc(fe_t * FE, float32 ** input, mfcc_t ** output, int32 nframes) { int32 i; #ifndef FIXED_POINT if ((void *) input == (void *) output) return nframes * FE->FEATURE_DIMENSION; #endif for (i = 0; i < nframes * FE->FEATURE_DIMENSION; ++i) output[0][i] = FLOAT2MFCC(input[0][i]); return i; }
/** * Convert a block of float32 to mfcc_t (can be done in-place) **/ int32 fe_float_to_mfcc(fe_t * fe, float32 ** input, mfcc_t ** output, int32 nframes) { int32 i; #ifndef FIXED_POINT if ((void *) input == (void *) output) return nframes * fe->feature_dimension; #endif for (i = 0; i < nframes * fe->feature_dimension; ++i) output[0][i] = FLOAT2MFCC(input[0][i]); return i; }
cmn_t * cmn_init(int32 veclen) { cmn_t *cmn; cmn = (cmn_t *) ckd_calloc(1, sizeof(cmn_t)); cmn->veclen = veclen; cmn->cmn_mean = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); cmn->cmn_var = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); cmn->sum = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); /* A front-end dependent magic number */ cmn->cmn_mean[0] = FLOAT2MFCC(12.0); cmn->nframe = 0; E_INFO("mean[0]= %.2f, mean[1..%d]= 0.0\n", MFCC2FLOAT(cmn->cmn_mean[0]), veclen - 1); return cmn; }
/* * Some of the gaussian density computation can be carried out in advance: * log(determinant) calculation, * 1/(2*var) in the exponent, * NOTE; The density computation is performed in log domain. */ static int32 gauden_dist_precompute(gauden_t * g, logmath_t *lmath, float32 varfloor) { int32 i, m, f, d, flen; mfcc_t *meanp; mfcc_t *varp; mfcc_t *detp; int32 floored; floored = 0; /* Allocate space for determinants */ g->det = (mfcc_t***)ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det)); for (m = 0; m < g->n_mgau; m++) { for (f = 0; f < g->n_feat; f++) { flen = g->featlen[f]; /* Determinants for all variance vectors in g->[m][f] */ for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) { *detp = 0; for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d]; i < flen; i++, varp++, meanp++) { float32 *fvarp = (float32 *)varp; #ifdef FIXED_POINT float32 *fmp = (float32 *)meanp; *meanp = FLOAT2MFCC(*fmp); #endif if (*fvarp < varfloor) { *fvarp = varfloor; ++floored; } *detp += (mfcc_t)logmath_log(lmath, 1.0 / sqrt(*fvarp * 2.0 * M_PI)); /* Precompute this part of the exponential */ *varp = (mfcc_t)logmath_ln_to_log(lmath, (1.0 / (*fvarp * 2.0))); } } } } E_INFO("%d variance values floored\n", floored); return 0; }
/* Update estimated max for next utterance */ void agc_emax_update(agc_t *agc) { if (agc->obs_frame) { /* Update only if some data observed */ agc->obs_max_sum += agc->obs_max; agc->obs_utt++; /* Re-estimate max over past history; decay the history */ agc->max = agc->obs_max_sum / agc->obs_utt; if (agc->obs_utt == 8) { agc->obs_max_sum /= 2; agc->obs_utt = 4; } } E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max); /* Reset the accumulators for the next utterance. */ agc->obs_frame = 0; agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */ }
static int32 s3_precomp(s2_semi_mgau_t *s, logmath_t *lmath, float32 vFloor) { int feat; for (feat = 0; feat < s->n_feat; ++feat) { float32 *fmp; mfcc_t *mp; mfcc_t *vp, *dp; int32 vecLen, i; vecLen = s->veclen[feat]; fmp = (float32 *) s->means[feat]; mp = s->means[feat]; vp = s->vars[feat]; dp = s->dets[feat]; for (i = 0; i < s->n_density; ++i) { mfcc_t d; int32 j; d = 0; for (j = 0; j < vecLen; ++j, ++vp, ++mp, ++fmp) { float64 fvar; *mp = FLOAT2MFCC(*fmp); /* Always do these pre-calculations in floating point */ fvar = *(float32 *) vp; if (fvar < vFloor) fvar = vFloor; d += (mfcc_t)logmath_log(lmath, 1 / sqrt(fvar * 2.0 * M_PI)); *vp = (mfcc_t)logmath_ln_to_log(lmath, 1.0 / (2.0 * fvar)); } *dp++ = d; } } return 0; }
#ifdef HAVE_CONFIG_H #include <config.h> #endif #include <stdio.h> #include <string.h> #include <math.h> #include "feat.h" #include "ckd_alloc.h" #include "test_macros.h" const mfcc_t data[6][13] = { { FLOAT2MFCC(15.114), FLOAT2MFCC(-1.424), FLOAT2MFCC(-0.953), FLOAT2MFCC(0.186), FLOAT2MFCC(-0.656), FLOAT2MFCC(-0.226), FLOAT2MFCC(-0.105), FLOAT2MFCC(-0.412), FLOAT2MFCC(-0.024), FLOAT2MFCC(-0.091), FLOAT2MFCC(-0.124), FLOAT2MFCC(-0.158), FLOAT2MFCC(-0.197)}, { FLOAT2MFCC(14.729), FLOAT2MFCC(-1.313), FLOAT2MFCC(-0.892), FLOAT2MFCC(0.140), FLOAT2MFCC(-0.676), FLOAT2MFCC(-0.089), FLOAT2MFCC(-0.313), FLOAT2MFCC(-0.422), FLOAT2MFCC(-0.058), FLOAT2MFCC(-0.101), FLOAT2MFCC(-0.100), FLOAT2MFCC(-0.128), FLOAT2MFCC(-0.123)}, { FLOAT2MFCC(14.502), FLOAT2MFCC(-1.351), FLOAT2MFCC(-1.028), FLOAT2MFCC(-0.189), FLOAT2MFCC(-0.718), FLOAT2MFCC(-0.139), FLOAT2MFCC(-0.121), FLOAT2MFCC(-0.365), FLOAT2MFCC(-0.139), FLOAT2MFCC(-0.154), FLOAT2MFCC(0.041), FLOAT2MFCC(0.009), FLOAT2MFCC(-0.073)}, { FLOAT2MFCC(14.557), FLOAT2MFCC(-1.676), FLOAT2MFCC(-0.864), FLOAT2MFCC(0.118), FLOAT2MFCC(-0.445), FLOAT2MFCC(-0.168), FLOAT2MFCC(-0.069), FLOAT2MFCC(-0.503), FLOAT2MFCC(-0.013), FLOAT2MFCC(0.007), FLOAT2MFCC(-0.056), FLOAT2MFCC(-0.075), FLOAT2MFCC(-0.237)}, { FLOAT2MFCC(14.665), FLOAT2MFCC(-1.498), FLOAT2MFCC(-0.582), FLOAT2MFCC(0.209), FLOAT2MFCC(-0.487), FLOAT2MFCC(-0.247),
#include <pocketsphinx.h> #include <stdio.h> #include <string.h> #include "pocketsphinx_internal.h" #include "test_macros.h" static const mfcc_t prior[13] = { FLOAT2MFCC(33.89), FLOAT2MFCC(-1.13), FLOAT2MFCC(0.83), FLOAT2MFCC(0.49), FLOAT2MFCC(-0.65), FLOAT2MFCC(0.12), FLOAT2MFCC(-0.03), FLOAT2MFCC(0.28), FLOAT2MFCC(0.41), FLOAT2MFCC(0.59), FLOAT2MFCC(0.11), FLOAT2MFCC(-0.20), FLOAT2MFCC(0.17) }; int ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected) { ps_decoder_t *ps; mfcc_t **cepbuf; FILE *rawfh; int16 *buf;
float32 agc_get_threshold(agc_t *agc) { return FLOAT2MFCC(agc->noise_thresh); }
void agc_set_threshold(agc_t *agc, float32 threshold) { agc->noise_thresh = FLOAT2MFCC(threshold); }
void agc_emax_set(agc_t *agc, float32 m) { agc->max = FLOAT2MFCC(m); E_INFO("AGCEMax: max= %.2f\n", m); }
static int initialize(int argc, char *argv[]) { const char *fdictfn; const char *dictfn; const char *ts2cbfn; uint32 n_ts; uint32 n_cb; /* define, parse and (partially) validate the command line */ parse_cmd_ln(argc, argv); feat = feat_init(cmd_ln_str("-feat"), cmn_type_from_str(cmd_ln_str("-cmn")), cmd_ln_boolean("-varnorm"), agc_type_from_str(cmd_ln_str("-agc")), 1, cmd_ln_int32("-ceplen")); if (cmd_ln_str("-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str("-lda")); if (feat_read_lda(feat, cmd_ln_str("-lda"), cmd_ln_int32("-ldadim")) < 0) return -1; } if (cmd_ln_str("-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str("-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL) return -1; if ((feat_set_subvecs(feat, subvecs)) < 0) return -1; } if (cmd_ln_exists("-agcthresh") && 0 != strcmp(cmd_ln_str("-agc"), "none")) { agc_set_threshold(feat->agc_struct, cmd_ln_float32("-agcthresh")); } if (feat->cmn_struct && cmd_ln_exists("-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str("-cmninit")); c = vallist; nvals = 0; while (nvals < feat->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); c = cc + 1; ++nvals; } if (nvals < feat->cmn_struct->veclen && *c != '\0') { feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); } ckd_free(vallist); } if (cmd_ln_str("-segdir")) corpus_set_seg_dir(cmd_ln_str("-segdir")); if (cmd_ln_str("-segext")) corpus_set_seg_ext(cmd_ln_str("-segext")); corpus_set_mfcc_dir(cmd_ln_str("-cepdir")); corpus_set_mfcc_ext(cmd_ln_str("-cepext")); if (cmd_ln_str("-lsnfn")) corpus_set_lsn_filename(cmd_ln_str("-lsnfn")); corpus_set_ctl_filename(cmd_ln_str("-ctlfn")); if (cmd_ln_int32("-nskip") && cmd_ln_int32("-runlen")) { corpus_set_interval(cmd_ln_int32("-nskip"), cmd_ln_int32("-runlen")); } else if (cmd_ln_int32("-part") && cmd_ln_int32("-npart")) { corpus_set_partition(cmd_ln_int32("-part"), cmd_ln_int32("-npart")); } if (corpus_init() != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_str("-moddeffn")) { E_INFO("Reading %s\n", cmd_ln_str("-moddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&mdef, cmd_ln_str("-moddeffn")) != S3_SUCCESS) { return S3_ERROR; } ts2cbfn = cmd_ln_str("-ts2cbfn"); if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { mdef->cb = semi_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { mdef->cb = cont_ts2cb(mdef->n_tied_state); n_ts = mdef->n_tied_state; n_cb = mdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { mdef->cb = ptm_ts2cb(mdef); n_ts = mdef->n_tied_state; n_cb = mdef->acmod_set->n_ci; } else if (s3ts2cb_read(ts2cbfn, &mdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } dictfn = cmd_ln_str("-dictfn"); if (dictfn == NULL) { E_FATAL("You must specify a content dictionary using -dictfn\n"); } E_INFO("Reading %s\n", dictfn); lex = lexicon_read(NULL, /* no lexicon to start */ dictfn, mdef->acmod_set); if (lex == NULL) return S3_ERROR; fdictfn = cmd_ln_str("-fdictfn"); if (fdictfn) { E_INFO("Reading %s\n", fdictfn); (void)lexicon_read(lex, /* add filler words content lexicon */ fdictfn, mdef->acmod_set); } } return S3_SUCCESS; }
/** * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data). * If out_mfc is 0, no actual reading will be done, and the number of * frames (plus padding) that would be read is returned. * * It's important that normalization is done before padding because * frames outside the data we are interested in shouldn't be taken * into normalization stats. * * @return # frames read (plus padding) if successful, -1 if * error (e.g., mfc array too small). */ static int32 feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win, int32 sf, int32 ef, mfcc_t ***out_mfc, int32 maxfr, int32 cepsize) { FILE *fp; int32 n_float32; float32 *float_feat; struct stat statbuf; int32 i, n, byterev; int32 start_pad, end_pad; mfcc_t **mfc; /* Initialize the output pointer to 0, so that any attempts to free() it if we fail before allocating it will not segfault! */ if (out_mfc) *out_mfc = 0; E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef); if (ef >= 0 && ef <= sf) { E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf); return -1; } /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */ if ((stat_retry(file, &statbuf) < 0) || ((fp = fopen(file, "rb")) == 0)) { #ifndef POCKETSPHINX_NET E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno)); #endif return -1; } /* Read #floats in header */ if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) { E_ERROR("%s: fread(#floats) failed\n", file); fclose(fp); return -1; } /* Check if n_float32 matches file size */ byterev = 0; if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */ n = n_float32; SWAP_INT32(&n); if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */ E_ERROR ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n", file, n_float32, n_float32, statbuf.st_size, statbuf.st_size); fclose(fp); return -1; } n_float32 = n; byterev = 1; } if (n_float32 <= 0) { E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32); fclose(fp); return -1; } /* Convert n to #frames of input */ n = n_float32 / cepsize; if (n * cepsize != n_float32) { E_ERROR("Header size field: %d; not multiple of %d\n", n_float32, cepsize); fclose(fp); return -1; } /* Check start and end frames */ if (sf > 0) { if (sf >= n) { E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file, sf, n); fclose(fp); return -1; } } if (ef < 0) ef = n-1; else if (ef >= n) { E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n", file, ef, n); ef = n-1; } /* Add window to start and end frames */ sf -= win; ef += win; if (sf < 0) { start_pad = -sf; sf = 0; } else start_pad = 0; if (ef >= n) { end_pad = ef - n + 1; ef = n - 1; } else end_pad = 0; /* Limit n if indicated by [sf..ef] */ if ((ef - sf + 1) < n) n = (ef - sf + 1); if (maxfr > 0 && n + start_pad + end_pad > maxfr) { E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n", file, maxfr, n + start_pad + end_pad); fclose(fp); return -1; } /* If no output buffer was supplied, then skip the actual data reading. */ if (out_mfc != 0) { /* Position at desired start frame and read actual MFC data */ mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t)); if (sf > 0) fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR); n_float32 = n * cepsize; #ifdef FIXED_POINT float_feat = ckd_calloc(n_float32, sizeof(float32)); #else float_feat = mfc[start_pad]; #endif if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) { E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize); ckd_free_2d(mfc); fclose(fp); return -1; } if (byterev) { for (i = 0; i < n_float32; i++) { SWAP_FLOAT32(&float_feat[i]); } } #ifdef FIXED_POINT for (i = 0; i < n_float32; ++i) { mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]); } ckd_free(float_feat); #endif /* Normalize */ feat_cmn(fcb, mfc + start_pad, n, 1, 1); feat_agc(fcb, mfc + start_pad, n, 1, 1); /* Replicate start and end frames if necessary. */ for (i = 0; i < start_pad; ++i) memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t)); for (i = 0; i < end_pad; ++i) memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1], cepsize * sizeof(mfcc_t)); *out_mfc = mfc; } fclose(fp); return n + start_pad + end_pad; }
void cmn(cmn_t *cmn, mfcc_t ** mfc, int32 varnorm, int32 n_frame) { mfcc_t *mfcp; mfcc_t t; int32 i, f; oe_assert(mfc != NULL); if (n_frame <= 0) return; /* If cmn->cmn_mean wasn't NULL, we need to zero the contents */ memset(cmn->cmn_mean, 0, cmn->veclen * sizeof(mfcc_t)); /* Find mean cep vector for this utterance */ for (f = 0; f < n_frame; f++) { mfcp = mfc[f]; for (i = 0; i < cmn->veclen; i++) { cmn->cmn_mean[i] += mfcp[i]; } } for (i = 0; i < cmn->veclen; i++) cmn->cmn_mean[i] /= n_frame; E_INFO("CMN: "); for (i = 0; i < cmn->veclen; i++) E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); E_INFOCONT("\n"); if (!varnorm) { /* Subtract mean from each cep vector */ for (f = 0; f < n_frame; f++) { mfcp = mfc[f]; for (i = 0; i < cmn->veclen; i++) mfcp[i] -= cmn->cmn_mean[i]; } } else { /* Scale cep vectors to have unit variance along each dimension, and subtract means */ /* If cmn->cmn_var wasn't NULL, we need to zero the contents */ memset(cmn->cmn_var, 0, cmn->veclen * sizeof(mfcc_t)); for (f = 0; f < n_frame; f++) { mfcp = mfc[f]; for (i = 0; i < cmn->veclen; i++) { t = mfcp[i] - cmn->cmn_mean[i]; cmn->cmn_var[i] += MFCCMUL(t, t); } } for (i = 0; i < cmn->veclen; i++) /* Inverse Std. Dev, RAH added type case from sqrt */ cmn->cmn_var[i] = FLOAT2MFCC(sqrt((float64)n_frame / MFCC2FLOAT(cmn->cmn_var[i]))); for (f = 0; f < n_frame; f++) { mfcp = mfc[f]; for (i = 0; i < cmn->veclen; i++) mfcp[i] = MFCCMUL((mfcp[i] - cmn->cmn_mean[i]), cmn->cmn_var[i]); } } }
#include <stdio.h> #include <string.h> #include <pocketsphinx.h> #include <sphinxbase/logmath.h> #include "acmod.h" #include "test_macros.h" static const mfcc_t cmninit[13] = { FLOAT2MFCC(41.00), FLOAT2MFCC(-5.29), FLOAT2MFCC(-0.12), FLOAT2MFCC(5.09), FLOAT2MFCC(2.48), FLOAT2MFCC(-4.07), FLOAT2MFCC(-1.37), FLOAT2MFCC(-1.78), FLOAT2MFCC(-5.08), FLOAT2MFCC(-2.05), FLOAT2MFCC(-6.45), FLOAT2MFCC(-1.42), FLOAT2MFCC(1.17) }; int main(int argc, char *argv[]) { acmod_t *acmod; logmath_t *lmath; cmd_ln_t *config;
int32 feat_read_lda(feat_t *feat, const char *ldafile, int32 dim) { FILE *fh; int32 byteswap, chksum_present; uint32 chksum, i, m, n; char **argname, **argval; assert(feat); if (feat->n_stream != 1) { E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n", feat->n_stream); return -1; } if ((fh = fopen(ldafile, "rb")) == NULL) { E_ERROR_SYSTEM("Failed to open transform file '%s' for reading", ldafile); return -1; } if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) { E_ERROR("Failed to read header from transform file '%s'\n", ldafile); fclose(fh); return -1; } chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0) E_WARN("%s: Version mismatch: %s, expecting %s\n", ldafile, argval[i], MATRIX_FILE_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; if (feat->lda) ckd_free_3d((void ***)feat->lda); { /* Use a temporary variable to avoid strict-aliasing problems. */ void ***outlda; if (bio_fread_3d(&outlda, sizeof(float32), &feat->n_lda, &m, &n, fh, byteswap, &chksum) < 0) { E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile); fclose(fh); return -1; } feat->lda = (void *)outlda; } fclose(fh); #ifdef FIXED_POINT /* FIXME: This is a fragile hack that depends on mfcc_t and * float32 being the same size (which they are, but...) */ for (i = 0; i < feat->n_lda * m * n; ++i) { feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]); } #endif /* Note that SphinxTrain stores the eigenvectors as row vectors. */ if (n != feat->stream_len[0]) E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]); /* Override dim from file if it is 0 or greater than m. */ if (dim > m || dim <= 0) { dim = m; } feat->out_dim = dim; return 0; }
int main_initialize(int argc, char *argv[], lexicon_t **out_lex, model_def_t **out_omdef, model_def_t **out_dmdef, feat_t** out_feat) { model_def_t *dmdef = NULL; model_def_t *omdef = NULL; lexicon_t *lex = NULL; feat_t *feat; const char *fn; uint32 n_ts; uint32 n_cb; const char *ts2cbfn; parse_cmd_ln(argc, argv); feat = feat_init(cmd_ln_str("-feat"), cmn_type_from_str(cmd_ln_str("-cmn")), cmd_ln_boolean("-varnorm"), agc_type_from_str(cmd_ln_str("-agc")), 1, cmd_ln_int32("-ceplen")); if (cmd_ln_str("-lda")) { E_INFO("Reading linear feature transformation from %s\n", cmd_ln_str("-lda")); if (feat_read_lda(feat, cmd_ln_str("-lda"), cmd_ln_int32("-ldadim")) < 0) return -1; } if (cmd_ln_str("-svspec")) { int32 **subvecs; E_INFO("Using subvector specification %s\n", cmd_ln_str("-svspec")); if ((subvecs = parse_subvecs(cmd_ln_str("-svspec"))) == NULL) return -1; if ((feat_set_subvecs(feat, subvecs)) < 0) return -1; } if (cmd_ln_exists("-agcthresh") && 0 != strcmp(cmd_ln_str("-agc"), "none")) { agc_set_threshold(feat->agc_struct, cmd_ln_float32("-agcthresh")); } if (feat->cmn_struct && cmd_ln_exists("-cmninit")) { char *c, *cc, *vallist; int32 nvals; vallist = ckd_salloc(cmd_ln_str("-cmninit")); c = vallist; nvals = 0; while (nvals < feat->cmn_struct->veclen && (cc = strchr(c, ',')) != NULL) { *cc = '\0'; feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); c = cc + 1; ++nvals; } if (nvals < feat->cmn_struct->veclen && *c != '\0') { feat->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c)); } ckd_free(vallist); } *out_feat = feat; if (cmd_ln_str("-omoddeffn")) { E_INFO("Reading output model definitions: %s\n", cmd_ln_str("-omoddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&omdef, cmd_ln_str("-omoddeffn")) != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_str("-dmoddeffn")) { E_INFO("Reading dump model definitions: %s\n", cmd_ln_str("-dmoddeffn")); if (model_def_read(&dmdef, cmd_ln_str("-dmoddeffn")) != S3_SUCCESS) { return S3_ERROR; } setup_d2o_map(dmdef, omdef); } else { E_INFO("Assuming dump and output model definitions are identical\n"); } ts2cbfn = cmd_ln_str("-ts2cbfn"); if (ts2cbfn) { if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { omdef->cb = semi_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { omdef->cb = cont_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = omdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { omdef->cb = ptm_ts2cb(omdef); n_ts = omdef->n_tied_state; n_cb = omdef->acmod_set->n_ci; } else if (s3ts2cb_read(cmd_ln_str("-ts2cbfn"), &omdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } if (omdef->n_tied_state != n_ts) { E_FATAL("Model definition file n_tied_state = %u, but %u mappings in ts2cb\n", omdef->n_tied_state, n_ts); } } } else { E_INFO("No mdef files. Assuming 1-class init\n"); } *out_omdef = omdef; *out_dmdef = dmdef; fn = cmd_ln_str("-dictfn"); if (fn) { E_INFO("Reading main lexicon: %s\n", fn); lex = lexicon_read(NULL, fn, omdef->acmod_set); if (lex == NULL) return S3_ERROR; } fn = cmd_ln_str("-fdictfn"); if (fn) { E_INFO("Reading filler lexicon: %s\n", fn); (void)lexicon_read(lex, fn, omdef->acmod_set); } *out_lex = lex; stride = cmd_ln_int32("-stride"); return S3_SUCCESS; }
#include <stdio.h> #include <string.h> #include <pocketsphinx.h> #include <sphinxbase/logmath.h> #include "acmod.h" #include "test_macros.h" static const mfcc_t prior[13] = { FLOAT2MFCC(37.03), FLOAT2MFCC(-1.01), FLOAT2MFCC(0.53), FLOAT2MFCC(0.49), FLOAT2MFCC(-0.60), FLOAT2MFCC(0.14), FLOAT2MFCC(-0.05), FLOAT2MFCC(0.25), FLOAT2MFCC(0.37), FLOAT2MFCC(0.58), FLOAT2MFCC(0.13), FLOAT2MFCC(-0.16), FLOAT2MFCC(0.17) }; int main(int argc, char *argv[]) { acmod_t *acmod; logmath_t *lmath; cmd_ln_t *config;
/** * For fixed point we are doing the computation in a fixlog domain, * so we have to add many processing cases. */ void fe_track_snr(fe_t * fe, int32 *in_speech) { powspec_t *signal; powspec_t *gain; noise_stats_t *noise_stats; powspec_t *mfspec; int32 i, num_filts; powspec_t lrt, snr; if (!(fe->remove_noise || fe->remove_silence)) { *in_speech = TRUE; return; } noise_stats = fe->noise_stats; mfspec = fe->mfspec; num_filts = noise_stats->num_filters; signal = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); if (noise_stats->undefined) { for (i = 0; i < num_filts; i++) { noise_stats->power[i] = mfspec[i]; noise_stats->noise[i] = mfspec[i]; #ifndef FIXED_POINT noise_stats->floor[i] = mfspec[i] / noise_stats->max_gain; noise_stats->peak[i] = 0.0; #else noise_stats->floor[i] = mfspec[i] - noise_stats->max_gain; noise_stats->peak[i] = MIN_FIXLOG; #endif } noise_stats->undefined = FALSE; } /* Calculate smoothed power */ for (i = 0; i < num_filts; i++) { #ifndef FIXED_POINT noise_stats->power[i] = noise_stats->lambda_power * noise_stats->power[i] + noise_stats->comp_lambda_power * mfspec[i]; #else noise_stats->power[i] = fe_log_add(noise_stats->lambda_power + noise_stats->power[i], noise_stats->comp_lambda_power + mfspec[i]); #endif } /* Noise estimation and vad decision */ fe_lower_envelope(noise_stats, noise_stats->power, noise_stats->noise, num_filts); lrt = FLOAT2MFCC(-10.0); for (i = 0; i < num_filts; i++) { #ifndef FIXED_POINT signal[i] = noise_stats->power[i] - noise_stats->noise[i]; if (signal[i] < 0) signal[i] = 0; snr = log(noise_stats->power[i] / noise_stats->noise[i]); #else signal[i] = fe_log_sub(noise_stats->power[i], noise_stats->noise[i]); snr = MFCC2FLOAT(noise_stats->power[i] - noise_stats->noise[i]); #endif if (snr > lrt) lrt = snr; } if (fe->remove_silence && (lrt < fe->vad_threshold)) *in_speech = FALSE; else *in_speech = TRUE; fe_lower_envelope(noise_stats, signal, noise_stats->floor, num_filts); fe_temp_masking(noise_stats, signal, noise_stats->peak, num_filts); if (!fe->remove_noise) { //no need for further calculations if noise cancellation disabled ckd_free(signal); return; } for (i = 0; i < num_filts; i++) { if (signal[i] < noise_stats->floor[i]) signal[i] = noise_stats->floor[i]; } gain = (powspec_t *) ckd_calloc(num_filts, sizeof(powspec_t)); #ifndef FIXED_POINT for (i = 0; i < num_filts; i++) { if (signal[i] < noise_stats->max_gain * noise_stats->power[i]) gain[i] = signal[i] / noise_stats->power[i]; else gain[i] = noise_stats->max_gain; if (gain[i] < noise_stats->inv_max_gain) gain[i] = noise_stats->inv_max_gain; } #else for (i = 0; i < num_filts; i++) { gain[i] = signal[i] - noise_stats->power[i]; if (gain[i] > noise_stats->max_gain) gain[i] = noise_stats->max_gain; if (gain[i] < noise_stats->inv_max_gain) gain[i] = noise_stats->inv_max_gain; } #endif /* Weight smoothing and time frequency normalization */ fe_weight_smooth(noise_stats, mfspec, gain, num_filts); ckd_free(gain); ckd_free(signal); }