int main(int argc, char *argv[]) { lexicon_t *lex; model_def_t *omdef; model_def_t *dmdef; feat_t *feat; uint32 n_stream, blksize; uint32 *veclen; uint32 ts_off; uint32 ts_cnt; FILE *fp; if (main_initialize(argc, argv, &lex, &omdef, &dmdef, &feat) != S3_SUCCESS) { return -1; } n_stream = feat_dimension1(feat); veclen = feat_stream_lengths(feat); blksize = feat_dimension(feat); if (strcmp(cmd_ln_str("-gthobj"), "state") == 0) { ts_off = cmd_ln_int32("-tsoff"); if (cmd_ln_str("-tscnt") == NULL) { ts_cnt = omdef->n_tied_state - ts_off; } else { ts_cnt = cmd_ln_int32("-tscnt"); } if (ts_off + ts_cnt > omdef->n_tied_state) { E_FATAL("Too many tied states specified\n"); } n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), cmd_ln_str("-segidxfn"), cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), ts_off, ts_cnt, omdef->n_tied_state, (dmdef != NULL ? dmdef->n_tied_state : omdef->n_tied_state)) != S3_SUCCESS) { E_ERROR("Unable to train [%u %u]\n", ts_off, ts_off+ts_cnt-1); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } if (cmd_ln_str("-tsrngfn") != NULL) { fp = fopen(cmd_ln_str("-tsrngfn"), "w"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", cmd_ln_str("-tsrngfn")); } fprintf(fp, "%d %d\n", ts_off, ts_cnt); } else if (ts_cnt != omdef->n_tied_state) { E_WARN("Subset of tied states specified, but no -tsrngfn arg"); } } else if (strcmp(cmd_ln_str("-gthobj"), "single") == 0) { n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), NULL, /* No index -> single class dump file */ cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), 0, 1, 1, 1) != S3_SUCCESS) { E_ERROR("Unable to train\n"); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } } return 0; }
int agg_phn_seg(lexicon_t *lex, acmod_set_t *acmod_set, feat_t *fcb, segdmp_type_t type) { uint16 *seg; vector_t *mfcc; vector_t **feat; int32 n_frame; uint32 tick_cnt; acmod_id_t *phone; uint32 *start; uint32 *len; uint32 n_phone; uint32 s; char *btw_mark; char *trans; char **word; uint32 n_word; int32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_stream; uint32 *veclen; tick_cnt = 0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); while (corpus_next_utt()) { if ((++tick_cnt % 500) == 0) { E_INFOCONT("[%u] ", tick_cnt); } if (corpus_get_sent(&trans) != S3_SUCCESS) { E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name()); } if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) { E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name()); } n_word = str2words(trans, NULL, 0); word = ckd_calloc(n_word, sizeof(char*)); str2words(trans, word, n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); start = ckd_calloc(n_phone, sizeof(uint32)); len = ckd_calloc(n_phone, sizeof(uint32)); /* check to see whether the word transcript and dictionary entries agree with the state segmentation */ if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("ck_seg failed"); continue; } if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("cvt2triphone failed"); continue; } ckd_free(btw_mark); if (mk_seg(acmod_set, seg, n_frame, phone, start, len, n_phone) != S3_SUCCESS) { free(trans); free(seg); ckd_free(word); ckd_free(phone); E_ERROR("mk_seg failed"); continue; } if (corpus_provides_mfcc()) { if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (s = 0; s < n_phone; s++) { segdmp_add_feat(phone[s], &feat[start[s]], len[s]); } feat_array_free(feat); free(&mfcc[0][0]); ckd_free(mfcc); } else { E_FATAL("No data type specified\n"); } free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); ckd_free(start); ckd_free(len); } return 0; }
int agg_all_seg(feat_t *fcb, segdmp_type_t type, const char *fn, uint32 stride) { uint32 seq_no; vector_t *mfcc = NULL; uint32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_frame; uint32 n_out_frame; uint32 blksz=0; vector_t **feat = NULL; uint32 i, j; uint32 t; uint32 n_stream; const uint32 *veclen; FILE *fp; uint32 ignore = 0; long start; int32 no_retries=0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); for (i = 0, blksz = 0; i < n_stream; i++) blksz += veclen[i]; fp = open_dmp(fn); start = ftell(fp); if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0; corpus_next_utt(); seq_no++) { if (mfcc) { free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } /* get the MFCC data for the utterance */ if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if ((seq_no % 1000) == 0) { E_INFO("[%u]\n", seq_no); } if (feat) { feat_array_free(feat); feat = NULL; } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (t = 0; t < n_frame; t++, j++) { if ((j % stride) == 0) { while (s3write(&feat[t][0][0], sizeof(float32), blksz, fp, &ignore) != blksz) { static int rpt = 0; if (!rpt) { E_ERROR_SYSTEM("Unable to write to dmp file"); E_INFO("sleeping...\n"); no_retries++; } sleep(3); if(no_retries > 10){ E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n "); } } ++n_out_frame; } } } if (fseek(fp, start, SEEK_SET) < 0) { E_ERROR_SYSTEM("Unable to seek to begin of dmp"); return S3_ERROR; } E_INFO("Wrote %u frames to %s\n", n_out_frame, fn); if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } return S3_SUCCESS; }