static int acmod_process_full_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames) { int32 nfr; /* Write to log file. */ if (acmod->mfcfh) acmod_log_mfc(acmod, *inout_cep, *inout_n_frames); /* Resize feat_buf to fit. */ if (acmod->n_feat_alloc < *inout_n_frames) { if (*inout_n_frames > MAX_N_FRAMES) E_FATAL("Batch processing can not process more than %d frames " "at once, requested %d\n", MAX_N_FRAMES, *inout_n_frames); feat_array_free(acmod->feat_buf); acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames); acmod->n_feat_alloc = *inout_n_frames; acmod->n_feat_frame = 0; acmod->feat_outidx = 0; } /* Make dynamic features. */ nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames, TRUE, TRUE, acmod->feat_buf); acmod->n_feat_frame = nfr; assert(acmod->n_feat_frame <= acmod->n_feat_alloc); *inout_cep += *inout_n_frames; *inout_n_frames = 0; return nfr; }
static int acmod_process_full_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames) { int32 nfr; /* Write to log file. */ if (acmod->mfcfh) acmod_log_mfc(acmod, *inout_cep, *inout_n_frames); /* Resize feat_buf to fit. */ if (acmod->n_feat_alloc < *inout_n_frames) { feat_array_free(acmod->feat_buf); acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames); acmod->n_feat_alloc = *inout_n_frames; acmod->n_feat_frame = 0; acmod->feat_outidx = 0; } /* Make dynamic features. */ nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames, TRUE, TRUE, acmod->feat_buf); acmod->n_feat_frame = nfr; assert(acmod->n_feat_frame <= acmod->n_feat_alloc); *inout_cep += *inout_n_frames; *inout_n_frames = 0; return nfr; }
int acmod_free(acmod_t *acmod) { if (acmod == NULL) return 0; if (--acmod->refcount > 0) return acmod->refcount; ckd_free(acmod->senone_scores); ckd_free(acmod->senone_active_vec); ckd_free(acmod->senone_active); if (acmod->mdef) bin_mdef_free(acmod->mdef); if (acmod->tmat) tmat_free(acmod->tmat); if (acmod->mgau) ps_mgau_free(acmod->mgau); featbuf_free(acmod->fb); feat_array_free(acmod->feat_buf); logmath_free(acmod->lmath); cmd_ln_free_r(acmod->config); ckd_free(acmod); return 0; }
void acmod_grow_feat_buf(acmod_t *acmod, int nfr) { mfcc_t ***new_feat_buf; new_feat_buf = feat_array_alloc(acmod->fcb, nfr); if (acmod->n_feat_frame || acmod->grow_feat) { memcpy(new_feat_buf[0][0], acmod->feat_buf[0][0], (acmod->n_feat_alloc * feat_dimension(acmod->fcb) * sizeof(***acmod->feat_buf))); } feat_array_free(acmod->feat_buf); acmod->framepos = ckd_realloc(acmod->framepos, nfr * sizeof(*acmod->framepos)); acmod->feat_buf = new_feat_buf; acmod->n_feat_alloc = nfr; }
void acmod_free(acmod_t *acmod) { if (acmod == NULL) return; feat_free(acmod->fcb); fe_free(acmod->fe); cmd_ln_free_r(acmod->config); if (acmod->mfc_buf) ckd_free_2d((void **)acmod->mfc_buf); if (acmod->feat_buf) feat_array_free(acmod->feat_buf); if (acmod->mfcfh) fclose(acmod->mfcfh); if (acmod->rawfh) fclose(acmod->rawfh); if (acmod->senfh) fclose(acmod->senfh); ckd_free(acmod->framepos); ckd_free(acmod->senone_scores); ckd_free(acmod->senone_active_vec); ckd_free(acmod->senone_active); ckd_free(acmod->rawdata); if (acmod->mdef) bin_mdef_free(acmod->mdef); if (acmod->tmat) tmat_free(acmod->tmat); if (acmod->mgau) ps_mgau_free(acmod->mgau); if (acmod->mllr) ps_mllr_free(acmod->mllr); ckd_free(acmod); }
int agg_phn_seg(lexicon_t *lex, acmod_set_t *acmod_set, feat_t *fcb, segdmp_type_t type) { uint16 *seg; vector_t *mfcc; vector_t **feat; int32 n_frame; uint32 tick_cnt; acmod_id_t *phone; uint32 *start; uint32 *len; uint32 n_phone; uint32 s; char *btw_mark; char *trans; char **word; uint32 n_word; int32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_stream; uint32 *veclen; tick_cnt = 0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); while (corpus_next_utt()) { if ((++tick_cnt % 500) == 0) { E_INFOCONT("[%u] ", tick_cnt); } if (corpus_get_sent(&trans) != S3_SUCCESS) { E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name()); } if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) { E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name()); } n_word = str2words(trans, NULL, 0); word = ckd_calloc(n_word, sizeof(char*)); str2words(trans, word, n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); start = ckd_calloc(n_phone, sizeof(uint32)); len = ckd_calloc(n_phone, sizeof(uint32)); /* check to see whether the word transcript and dictionary entries agree with the state segmentation */ if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("ck_seg failed"); continue; } if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("cvt2triphone failed"); continue; } ckd_free(btw_mark); if (mk_seg(acmod_set, seg, n_frame, phone, start, len, n_phone) != S3_SUCCESS) { free(trans); free(seg); ckd_free(word); ckd_free(phone); E_ERROR("mk_seg failed"); continue; } if (corpus_provides_mfcc()) { if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (s = 0; s < n_phone; s++) { segdmp_add_feat(phone[s], &feat[start[s]], len[s]); } feat_array_free(feat); free(&mfcc[0][0]); ckd_free(mfcc); } else { E_FATAL("No data type specified\n"); } free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); ckd_free(start); ckd_free(len); } return 0; }
int main(int32 argc, char *argv[]) { char sent[16384]; cmd_ln_t *config; print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", defn); unlimit(); config = cmd_ln_get(); ctloffset = cmd_ln_int32_r(config, "-ctloffset"); sentfile = cmd_ln_str_r(config, "-insent"); if ((sentfp = fopen(sentfile, "r")) == NULL) E_FATAL_SYSTEM("Failed to open file %s for reading", sentfile); /* Note various output directories */ if (cmd_ln_str_r(config, "-s2stsegdir") != NULL) s2stsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-s2stsegdir")); if (cmd_ln_str_r(config, "-stsegdir") != NULL) stsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-stsegdir")); if (cmd_ln_str_r(config, "-phsegdir") != NULL) phsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-phsegdir")); if (cmd_ln_str_r(config, "-phlabdir") != NULL) phlabdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-phlabdir")); if (cmd_ln_str_r(config, "-wdsegdir") != NULL) wdsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-wdsegdir")); /* HACK! Pre-read insent without checking whether ctl could also be read. In general, this is caused by the fact that we used multiple files to specify resource in sphinx III. This is easy to solve but currently I just to remove process_ctl because it duplicates badly with ctl_process. The call back function will take care of matching the uttfile names. We don't need to worry too much about inconsistency. */ while (ctloffset > 0) { if (fgets(sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); break; } --ctloffset; } if ((outsentfile = cmd_ln_str_r(config, "-outsent")) != NULL) { if ((outsentfp = fopen(outsentfile, "w")) == NULL) E_FATAL_SYSTEM("Failed to open file %s for writing", outsentfile); } if ((outctlfile = cmd_ln_str_r(config, "-outctl")) != NULL) { if ((outctlfp = fopen(outctlfile, "w")) == NULL) E_FATAL_SYSTEM("Failed top open file %s for writing", outctlfile); } if ((cmd_ln_str_r(config, "-s2stsegdir") == NULL) && (cmd_ln_str_r(config, "-stsegdir") == NULL) && (cmd_ln_str_r(config, "-phlabdir") == NULL) && (cmd_ln_str_r(config, "-phsegdir") == NULL) && (cmd_ln_str_r(config, "-wdsegdir") == NULL) && (cmd_ln_str_r(config, "-outsent") == NULL)) E_FATAL("Missing output file/directory argument(s)\n"); /* Read in input databases */ models_init(config); if (!feat) feat = feat_array_alloc(kbcore_fcb(kbc), S3_MAX_FRAMES); timers[tmr_utt].name = "U"; timers[tmr_gauden].name = "G"; timers[tmr_senone].name = "S"; timers[tmr_align].name = "A"; /* Initialize align module */ align_init(kbc->mdef, kbc->tmat, dict, config, kbc->logmath); printf("\n"); if (cmd_ln_str_r(config, "-mllr") != NULL) { if (kbc->mgau) adapt_set_mllr(adapt_am, kbc->mgau, cmd_ln_str_r(config, "-mllr"), NULL, kbc->mdef, config); else if (kbc->ms_mgau) model_set_mllr(kbc->ms_mgau, cmd_ln_str_r(config, "-mllr"), NULL, kbcore_fcb(kbc), kbc->mdef, config); else E_WARN("Can't use MLLR matrices with .s2semi. yet\n"); } tot_nfr = 0; /* process_ctlfile (); */ if (cmd_ln_str_r(config, "-ctl")) { /* When -ctlfile is speicified, corpus.c will look at -ctl_mllr to get the corresponding MLLR for the utterance */ ctl_process(cmd_ln_str_r(config, "-ctl"), NULL, cmd_ln_str_r(config, "-ctl_mllr"), cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_align, config); } else { E_FATAL(" -ctl are not specified.\n"); } if (tot_nfr > 0) { printf("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_cpu, tm_utt.t_tot_cpu / (tot_nfr * 0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_elapsed, tm_utt.t_tot_elapsed / (tot_nfr * 0.01)); } if (outsentfp) fclose(outsentfp); if (outctlfp) fclose(outctlfp); if (sentfp) fclose(sentfp); ckd_free(s2stsegdir); ckd_free(stsegdir); ckd_free(phsegdir); ckd_free(wdsegdir); feat_array_free(feat); align_free(); models_free(); #if (! WIN32) system("ps aguxwww | grep s3align"); #endif cmd_ln_free_r(config); return 0; }
int agg_all_seg(feat_t *fcb, segdmp_type_t type, const char *fn, uint32 stride) { uint32 seq_no; vector_t *mfcc = NULL; uint32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_frame; uint32 n_out_frame; uint32 blksz=0; vector_t **feat = NULL; uint32 i, j; uint32 t; uint32 n_stream; const uint32 *veclen; FILE *fp; uint32 ignore = 0; long start; int32 no_retries=0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); for (i = 0, blksz = 0; i < n_stream; i++) blksz += veclen[i]; fp = open_dmp(fn); start = ftell(fp); if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0; corpus_next_utt(); seq_no++) { if (mfcc) { free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } /* get the MFCC data for the utterance */ if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if ((seq_no % 1000) == 0) { E_INFO("[%u]\n", seq_no); } if (feat) { feat_array_free(feat); feat = NULL; } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (t = 0; t < n_frame; t++, j++) { if ((j % stride) == 0) { while (s3write(&feat[t][0][0], sizeof(float32), blksz, fp, &ignore) != blksz) { static int rpt = 0; if (!rpt) { E_ERROR_SYSTEM("Unable to write to dmp file"); E_INFO("sleeping...\n"); no_retries++; } sleep(3); if(no_retries > 10){ E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n "); } } ++n_out_frame; } } } if (fseek(fp, start, SEEK_SET) < 0) { E_ERROR_SYSTEM("Unable to seek to begin of dmp"); return S3_ERROR; } E_INFO("Wrote %u frames to %s\n", n_out_frame, fn); if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } return S3_SUCCESS; }