int corpus_set_interval(uint32 n_skip, uint32 run_len) { sv_n_skip = n_skip; sv_run_len = run_len; if (n_skip) { E_INFO("skipping %d utts.\n", n_skip); for (begin = 0; (n_skip > 0) && corpus_next_utt(); --n_skip, begin++); E_INFO("Last utt skipped: %s\n", corpus_utt()); } if (run_len != UNTIL_EOF) n_run = run_len; n_proc = 0; return S3_SUCCESS; }
int agg_phn_seg(lexicon_t *lex, acmod_set_t *acmod_set, feat_t *fcb, segdmp_type_t type) { uint16 *seg; vector_t *mfcc; vector_t **feat; int32 n_frame; uint32 tick_cnt; acmod_id_t *phone; uint32 *start; uint32 *len; uint32 n_phone; uint32 s; char *btw_mark; char *trans; char **word; uint32 n_word; int32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_stream; uint32 *veclen; tick_cnt = 0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); while (corpus_next_utt()) { if ((++tick_cnt % 500) == 0) { E_INFOCONT("[%u] ", tick_cnt); } if (corpus_get_sent(&trans) != S3_SUCCESS) { E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name()); } if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) { E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name()); } n_word = str2words(trans, NULL, 0); word = ckd_calloc(n_word, sizeof(char*)); str2words(trans, word, n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); start = ckd_calloc(n_phone, sizeof(uint32)); len = ckd_calloc(n_phone, sizeof(uint32)); /* check to see whether the word transcript and dictionary entries agree with the state segmentation */ if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("ck_seg failed"); continue; } if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("cvt2triphone failed"); continue; } ckd_free(btw_mark); if (mk_seg(acmod_set, seg, n_frame, phone, start, len, n_phone) != S3_SUCCESS) { free(trans); free(seg); ckd_free(word); ckd_free(phone); E_ERROR("mk_seg failed"); continue; } if (corpus_provides_mfcc()) { if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (s = 0; s < n_phone; s++) { segdmp_add_feat(phone[s], &feat[start[s]], len[s]); } feat_array_free(feat); free(&mfcc[0][0]); ckd_free(mfcc); } else { E_FATAL("No data type specified\n"); } free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); ckd_free(start); ckd_free(len); } return 0; }
int cnt_phn_seg(model_def_t *mdef, lexicon_t *lex, uint32 **out_n_seg, uint32 ***out_n_frame_per) { uint32 seq_no = 0; uint16 *seg; uint32 n_frame; uint32 i, j; uint32 n_acmod; uint32 *phone; uint32 n_phone; uint32 *n_seg; uint32 **n_frame_per; uint32 *start; uint32 *len; seg_len_t *cur; seg_len_t *tmp; seg_len_t *phn_hd; seg_len_t *phn_tl; n_acmod = acmod_set_n_acmod(mdef->acmod_set); E_INFO("Counting # occ. for %u models\n", n_acmod); n_seg = ckd_calloc(n_acmod, sizeof(uint32)); hd = ckd_calloc(n_acmod, sizeof(seg_len_t *)); tl = ckd_calloc(n_acmod, sizeof(seg_len_t *)); for (seq_no = corpus_get_begin(); corpus_next_utt(); seq_no++) { if (!(seq_no % 250)) { fprintf(stderr, " cnt[%u]", seq_no); fflush(stderr); } corpus_get_seg(&seg, &n_frame); phone = get_next_phnseq(mdef, lex, &n_phone); ck_seg(mdef->acmod_set, phone, n_phone, seg, n_frame, corpus_utt()); start = ckd_calloc(n_phone, sizeof(uint32)); len = ckd_calloc(n_phone, sizeof(uint32)); mk_seg(mdef->acmod_set, seg, n_frame, phone, start, len, n_phone); ckd_free(start); ckd_free(seg); ckd_free(phone); for (i = 0; i < n_phone; i++) { /* insert the len for list phone[i] */ phn_hd = hd[phone[i]]; phn_tl = tl[phone[i]]; cur = (seg_len_t *)ckd_calloc(1, sizeof(seg_len_t)); cur->len = len[i]; if (phn_tl == NULL) { hd[phone[i]] = tl[phone[i]] = cur; } else { phn_tl->nxt = cur; tl[phone[i]] = cur; } } ckd_free(len); } n_frame_per = (uint32 **)ckd_calloc(n_acmod, sizeof(uint32 *)); for (i = 0; i < n_acmod; i++) { if (hd[i] == NULL) { n_seg[i] = 0; } else { for (cur = hd[i], j = 0; cur != NULL; j++, cur = cur->nxt); n_seg[i] = j; n_frame_per[i] = (uint32 *)ckd_calloc(n_seg[i], sizeof(uint32)); for (cur = hd[i], j = 0; cur != NULL; j++, cur = cur->nxt) n_frame_per[i][j] = cur->len; for (cur = hd[i]; cur != NULL; cur = tmp) { tmp = cur->nxt; ckd_free(cur); } E_INFO("phn= %s n_seg= %u\n", acmod_set_id2name(mdef->acmod_set, i), n_seg[i]); } } ckd_free(hd); ckd_free(tl); *out_n_seg = n_seg; *out_n_frame_per = n_frame_per; return S3_SUCCESS; }
int agg_all_seg(feat_t *fcb, segdmp_type_t type, const char *fn, uint32 stride) { uint32 seq_no; vector_t *mfcc = NULL; uint32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_frame; uint32 n_out_frame; uint32 blksz=0; vector_t **feat = NULL; uint32 i, j; uint32 t; uint32 n_stream; const uint32 *veclen; FILE *fp; uint32 ignore = 0; long start; int32 no_retries=0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); for (i = 0, blksz = 0; i < n_stream; i++) blksz += veclen[i]; fp = open_dmp(fn); start = ftell(fp); if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0; corpus_next_utt(); seq_no++) { if (mfcc) { free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } /* get the MFCC data for the utterance */ if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if ((seq_no % 1000) == 0) { E_INFO("[%u]\n", seq_no); } if (feat) { feat_array_free(feat); feat = NULL; } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (t = 0; t < n_frame; t++, j++) { if ((j % stride) == 0) { while (s3write(&feat[t][0][0], sizeof(float32), blksz, fp, &ignore) != blksz) { static int rpt = 0; if (!rpt) { E_ERROR_SYSTEM("Unable to write to dmp file"); E_INFO("sleeping...\n"); no_retries++; } sleep(3); if(no_retries > 10){ E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n "); } } ++n_out_frame; } } } if (fseek(fp, start, SEEK_SET) < 0) { E_ERROR_SYSTEM("Unable to seek to begin of dmp"); return S3_ERROR; } E_INFO("Wrote %u frames to %s\n", n_out_frame, fn); if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to write to dmp file"); return S3_ERROR; } return S3_SUCCESS; }