int acmod_process_feat(acmod_t *acmod, mfcc_t **feat) { int i, inptr; if (acmod->n_feat_frame == acmod->n_feat_alloc) { if (acmod->grow_feat) acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); else return 0; } if (acmod->grow_feat) { /* Grow to avoid wraparound if grow_feat == TRUE. */ inptr = acmod->feat_outidx + acmod->n_feat_frame; while (inptr + 1 >= acmod->n_feat_alloc) acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); } else { inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc; } for (i = 0; i < feat_dimension1(acmod->fcb); ++i) memcpy(acmod->feat_buf[inptr][i], feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat)); ++acmod->n_feat_frame; assert(acmod->n_feat_frame <= acmod->n_feat_alloc); return 1; }
void feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp) { uint32 i, j, k; for (i = 0; i < nfr; i++) { fprintf(fp, "%8d:\n", i); for (j = 0; j < feat_dimension1(fcb); j++) { fprintf(fp, "\t%2d:", j); for (k = 0; k < feat_dimension2(fcb, j); k++) fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k])); fprintf(fp, "\n"); } } fflush(fp); }
void feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp) { int32 i, j, k; for (i = 0; i < nfr; i++) { #ifndef POCKETSPHINX_NET fprintf(fp, "%8d:\n", i); #else net_fprintf(fp, "{0:8}:\n", i); #endif for (j = 0; j < feat_dimension1(fcb); j++) { #ifndef POCKETSPHINX_NET fprintf(fp, "\t%2d:", j); #else net_fprintf(fp, "\t{0:2}:\n", j); #endif for (k = 0; k < (int32)feat_dimension2(fcb, j); k++) { #ifndef POCKETSPHINX_NET fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k])); #else //need check net_fprintf(fp, " {0:8.4}", MFCC2FLOAT(feat[i][j][k])); #endif } #ifndef POCKETSPHINX_NET fprintf(fp, "\n"); #else net_fprintf(fp, "\n"); #endif } } fflush(fp); }
int agg_phn_seg(lexicon_t *lex, acmod_set_t *acmod_set, feat_t *fcb, segdmp_type_t type) { uint16 *seg; vector_t *mfcc; vector_t **feat; int32 n_frame; uint32 tick_cnt; acmod_id_t *phone; uint32 *start; uint32 *len; uint32 n_phone; uint32 s; char *btw_mark; char *trans; char **word; uint32 n_word; int32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_stream; uint32 *veclen; tick_cnt = 0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); while (corpus_next_utt()) { if ((++tick_cnt % 500) == 0) { E_INFOCONT("[%u] ", tick_cnt); } if (corpus_get_sent(&trans) != S3_SUCCESS) { E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name()); } if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) { E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name()); } n_word = str2words(trans, NULL, 0); word = ckd_calloc(n_word, sizeof(char*)); str2words(trans, word, n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); start = ckd_calloc(n_phone, sizeof(uint32)); len = ckd_calloc(n_phone, sizeof(uint32)); /* check to see whether the word transcript and dictionary entries agree with the state segmentation */ if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("ck_seg failed"); continue; } if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("cvt2triphone failed"); continue; } ckd_free(btw_mark); if (mk_seg(acmod_set, seg, n_frame, phone, start, len, n_phone) != S3_SUCCESS) { free(trans); free(seg); ckd_free(word); ckd_free(phone); E_ERROR("mk_seg failed"); continue; } if (corpus_provides_mfcc()) { if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (s = 0; s < n_phone; s++) { segdmp_add_feat(phone[s], &feat[start[s]], len[s]); } feat_array_free(feat); free(&mfcc[0][0]); ckd_free(mfcc); } else { E_FATAL("No data type specified\n"); } free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); ckd_free(start); ckd_free(len); } return 0; }
ps_mgau_t * ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef) { /* Codebooks */ ms_mgau_model_t *msg; ps_mgau_t *mg; gauden_t *g; senone_t *s; cmd_ln_t *config; int i; static ps_mgaufuncs_t ms_mgau_funcs = { "ms", ms_cont_mgau_frame_eval, /* frame_eval */ ms_mgau_mllr_transform, /* transform */ ms_mgau_free /* free */ }; config = acmod->config; msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t)); msg->config = config; msg->g = 0; msg->s = 0; g = msg->g = gauden_init(cmd_ln_str_r(config, "-mean"), cmd_ln_str_r(config, "-var"), cmd_ln_float32_r(config, "-varfloor"), lmath); /* Verify n_feat and veclen, against acmod. */ if (g->n_feat != feat_dimension1(acmod->fcb)) { E_ERROR("Number of streams does not match: %d != %d\n", g->n_feat, feat_dimension1(acmod->fcb)); goto error_out; } for (i = 0; i < g->n_feat; ++i) { if (g->featlen[i] != feat_dimension2(acmod->fcb, i)) { E_ERROR("Dimension of stream %d does not match: %d != %d\n", i, g->featlen[i], feat_dimension2(acmod->fcb, i)); goto error_out; } } s = msg->s = senone_init(msg->g, cmd_ln_str_r(config, "-mixw"), cmd_ln_str_r(config, "-senmgau"), cmd_ln_float32_r(config, "-mixwfloor"), lmath, mdef); s->aw = cmd_ln_int32_r(config, "-aw"); /* Verify senone parameters against gauden parameters */ if (s->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, s->n_feat); if (s->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, s->n_cw); if ((int)s->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", s->n_gauden, g->n_mgau); if ((int)s->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", s->n_gauden, g->n_mgau); msg->topn = cmd_ln_int32_r(config, "-topn"); E_INFO("The value of topn: %d\n", msg->topn); if (msg->topn == 0 || msg->topn > msg->g->n_density) { E_WARN ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n", msg->topn, msg->g->n_density); msg->topn = msg->g->n_density; } msg->dist = (gauden_dist_t ***) ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn, sizeof(gauden_dist_t)); msg->mgau_active = (uint8*)ckd_calloc(g->n_mgau, sizeof(int8)); mg = (ps_mgau_t *)msg; mg->vt = &ms_mgau_funcs; return mg; error_out: ms_mgau_free(ps_mgau_base(msg)); return 0; }
int main(int argc, char *argv[]) { lexicon_t *lex; model_def_t *omdef; model_def_t *dmdef; feat_t *feat; uint32 n_stream, blksize; uint32 *veclen; uint32 ts_off; uint32 ts_cnt; FILE *fp; if (main_initialize(argc, argv, &lex, &omdef, &dmdef, &feat) != S3_SUCCESS) { return -1; } n_stream = feat_dimension1(feat); veclen = feat_stream_lengths(feat); blksize = feat_dimension(feat); if (strcmp(cmd_ln_str("-gthobj"), "state") == 0) { ts_off = cmd_ln_int32("-tsoff"); if (cmd_ln_str("-tscnt") == NULL) { ts_cnt = omdef->n_tied_state - ts_off; } else { ts_cnt = cmd_ln_int32("-tscnt"); } if (ts_off + ts_cnt > omdef->n_tied_state) { E_FATAL("Too many tied states specified\n"); } n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), cmd_ln_str("-segidxfn"), cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), ts_off, ts_cnt, omdef->n_tied_state, (dmdef != NULL ? dmdef->n_tied_state : omdef->n_tied_state)) != S3_SUCCESS) { E_ERROR("Unable to train [%u %u]\n", ts_off, ts_off+ts_cnt-1); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } if (cmd_ln_str("-tsrngfn") != NULL) { fp = fopen(cmd_ln_str("-tsrngfn"), "w"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", cmd_ln_str("-tsrngfn")); } fprintf(fp, "%d %d\n", ts_off, ts_cnt); } else if (ts_cnt != omdef->n_tied_state) { E_WARN("Subset of tied states specified, but no -tsrngfn arg"); } } else if (strcmp(cmd_ln_str("-gthobj"), "single") == 0) { n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), NULL, /* No index -> single class dump file */ cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), 0, 1, 1, 1) != S3_SUCCESS) { E_ERROR("Unable to train\n"); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } } return 0; }
s2_semi_mgau_t * s2_semi_mgau_init(cmd_ln_t *config, logmath_t *lmath, feat_t *fcb, mdef_t *mdef) { s2_semi_mgau_t *s; char const *sendump_path; float32 **fgau; int i; s = ckd_calloc(1, sizeof(*s)); s->config = config; s->lmath = logmath_retain(lmath); /* Log-add table. */ s->lmath_8b = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE); if (s->lmath_8b == NULL) { s2_semi_mgau_free(s); return NULL; } /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ if (logmath_get_width(s->lmath_8b) != 1) { E_ERROR("Log base %f is too small to represent add table in 8 bits\n", logmath_get_base(s->lmath_8b)); s2_semi_mgau_free(s); return NULL; } /* Inherit stream dimensions from acmod, will be checked below. */ s->n_feat = feat_dimension1(fcb); s->veclen = ckd_calloc(s->n_feat, sizeof(int32)); for (i = 0; i < s->n_feat; ++i) s->veclen[i] = feat_dimension2(fcb, i); /* Read means and variances. */ if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-mean"), &fgau) < 0) { s2_semi_mgau_free(s); return NULL; } s->means = (mfcc_t **)fgau; if (s3_read_mgau(s, cmd_ln_str_r(s->config, "-var"), &fgau) < 0) { s2_semi_mgau_free(s); return NULL; } s->vars = (mfcc_t **)fgau; /* Precompute (and fixed-point-ize) means, variances, and determinants. */ s->dets = (mfcc_t **)ckd_calloc_2d(s->n_feat, s->n_density, sizeof(**s->dets)); s3_precomp(s, s->lmath, cmd_ln_float32_r(s->config, "-varfloor")); /* Read mixture weights */ if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) read_sendump(s, mdef, sendump_path); else read_mixw(s, cmd_ln_str_r(s->config, "-mixw"), cmd_ln_float32_r(s->config, "-mixwfloor")); s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); /* Determine top-N for each feature */ s->topn_beam = ckd_calloc(s->n_feat, sizeof(*s->topn_beam)); s->max_topn = cmd_ln_int32_r(s->config, "-topn"); split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, s->n_feat); E_INFO("Maximum top-N: %d ", s->max_topn); E_INFOCONT("Top-N beams:"); for (i = 0; i < s->n_feat; ++i) { E_INFOCONT(" %d", s->topn_beam[i]); } E_INFOCONT("\n"); /* Top-N scores from recent frames */ s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; s->topn_hist = (vqFeature_t ***) ckd_calloc_3d(s->n_topn_hist, s->n_feat, s->max_topn, sizeof(***s->topn_hist)); s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, s->n_feat, sizeof(**s->topn_hist_n)); for (i = 0; i < s->n_topn_hist; ++i) { int j; for (j = 0; j < s->n_feat; ++j) { int k; for (k = 0; k < s->max_topn; ++k) { s->topn_hist[i][j][k].score = WORST_DIST; s->topn_hist[i][j][k].codeword = k; } } } return s; }
int agg_all_seg(feat_t *fcb, segdmp_type_t type, const char *fn, uint32 stride) { uint32 seq_no; vector_t *mfcc = NULL; uint32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_frame; uint32 n_out_frame; uint32 blksz=0; vector_t **feat = NULL; uint32 i, j; uint32 t; uint32 n_stream; const uint32 *veclen; FILE *fp; uint32 ignore = 0; long start; int32 no_retries=0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); for (i = 0, blksz = 0; i < n_stream; i++) blksz += veclen[i]; fp = open_dmp(fn); start = ftell(fp); if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0; corpus_next_utt(); seq_no++) { if (mfcc) { free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } /* get the MFCC data for the utterance */ if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if ((seq_no % 1000) == 0) { E_INFO("[%u]\n", seq_no); } if (feat) { feat_array_free(feat); feat = NULL; } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (t = 0; t < n_frame; t++, j++) { if ((j % stride) == 0) { while (s3write(&feat[t][0][0], sizeof(float32), blksz, fp, &ignore) != blksz) { static int rpt = 0; if (!rpt) { E_ERROR_SYSTEM("Unable to write to dmp file"); E_INFO("sleeping...\n"); no_retries++; } sleep(3); if(no_retries > 10){ E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n "); } } ++n_out_frame; } } } if (fseek(fp, start, SEEK_SET) < 0) { E_ERROR_SYSTEM("Unable to seek to begin of dmp"); return S3_ERROR; } E_INFO("Wrote %u frames to %s\n", n_out_frame, fn); if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } return S3_SUCCESS; }