int mk_phone_seq(acmod_id_t **out_phone, uint32 *out_n_phone, char *trans, acmod_set_t *acmod_set, lexicon_t *lex) { char **word; uint32 n_word; uint32 n_phone; acmod_id_t *phone; char *btw_mark; word = mk_wordlist(trans, &n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); if (phone == NULL) { E_WARN("Unable to produce phonetic transcription for the utterance '%s'.\n", corpus_utt()); } cvt2triphone(acmod_set, phone, btw_mark, n_phone); ckd_free(btw_mark); ckd_free(word); *out_phone = phone; *out_n_phone = n_phone; return S3_SUCCESS; }
state_t *next_utt_states(uint32 *n_state, lexicon_t *lex, model_inventory_t *inv, model_def_t *mdef, char *trans, int32 sil_del, char* silence_str ) { char **word; uint32 n_word; uint32 n_phone; char *btw_mark; acmod_set_t *acmod_set; acmod_id_t *phone; acmod_id_t optSil; state_t *state_seq; word = mk_wordlist(trans, &n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); if (phone == NULL) { E_WARN("Unable to produce CI phones for utt\n"); ckd_free(word); return NULL; } acmod_set = inv->acmod_set; #ifdef NEXT_UTT_STATES_VERBOSE print_phone_list(phone, n_phone, btw_mark, acmod_set); #endif cvt2triphone(acmod_set, phone, btw_mark, n_phone); #ifdef NEXT_UTT_STATES_VERBOSE print_phone_list(phone, n_phone, btw_mark, acmod_set); #endif optSil= acmod_set_name2id(acmod_set, silence_str); /* * Debug? * E_INFO("Silence id %d\n",optSil); */ state_seq = state_seq_make(n_state, phone, n_phone, inv, mdef,sil_del,(acmod_id_t)optSil); #ifdef NEXT_UTT_STATES_VERBOSE state_seq_print(state_seq, *n_state, mdef); #endif ckd_free(phone); ckd_free(btw_mark); ckd_free(word); return state_seq; }
int agg_phn_seg(lexicon_t *lex, acmod_set_t *acmod_set, feat_t *fcb, segdmp_type_t type) { uint16 *seg; vector_t *mfcc; vector_t **feat; uint32 n_frame; uint32 tick_cnt; acmod_id_t *phone; uint32 *start; uint32 *len; uint32 n_phone; uint32 s; char *btw_mark; char *trans; char **word; uint32 n_word; int32 mfc_veclen = cmd_ln_int32("-ceplen"); uint32 n_stream; const uint32 *veclen; tick_cnt = 0; n_stream = feat_dimension1(fcb); veclen = feat_stream_lengths(fcb); while (corpus_next_utt()) { if ((++tick_cnt % 500) == 0) { printf("[%u] ", tick_cnt); fflush(stdout); } if (corpus_get_sent(&trans) != S3_SUCCESS) { E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name()); } if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) { E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name()); } word = mk_wordlist(trans, &n_word); phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex); start = ckd_calloc(n_phone, sizeof(uint32)); len = ckd_calloc(n_phone, sizeof(uint32)); /* check to see whether the word transcript and dictionary entries agree with the state segmentation */ if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("ck_seg failed"); continue; } if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) { free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); E_ERROR("cvt2triphone failed"); continue; } ckd_free(btw_mark); if (mk_seg(acmod_set, seg, n_frame, phone, start, len, n_phone) != S3_SUCCESS) { free(trans); free(seg); ckd_free(word); ckd_free(phone); E_ERROR("mk_seg failed"); continue; } if (corpus_provides_mfcc()) { if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) { E_FATAL("Can't read input features from %s\n", corpus_utt()); } if (n_frame < 9) { E_WARN("utt %s too short\n", corpus_utt()); if (mfcc) { ckd_free(mfcc[0]); ckd_free(mfcc); mfcc = NULL; } continue; } feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb)); feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat); for (s = 0; s < n_phone; s++) { segdmp_add_feat(phone[s], &feat[start[s]], len[s]); } feat_array_free(feat); free(&mfcc[0][0]); ckd_free(mfcc); } else { E_FATAL("No data type specified\n"); } free(trans); /* alloc'ed using strdup, not ckd_*() */ free(seg); /* alloc'ed using malloc in areadshort(), not ckd_*() */ ckd_free(word); ckd_free(phone); ckd_free(start); ckd_free(len); } return 0; }