static void vithist_lmstate_reset(vithist_t * vh) { gnode_t *lgn, *gn; int32 i; vh_lms2vh_t *lms2vh, *child; for (lgn = vh->lwidlist; lgn; lgn = gnode_next(lgn)) { i = (int32) gnode_int32(lgn); lms2vh = vh->lms2vh_root[i]; for (gn = lms2vh->children; gn; gn = gnode_next(gn)) { child = (vh_lms2vh_t *) gnode_ptr(gn); ckd_free((void *) child); } glist_free(lms2vh->children); ckd_free((void *) lms2vh); vh->lms2vh_root[i] = NULL; } glist_free(vh->lwidlist); vh->lwidlist = NULL; }
static void vithist_lmstate_dump (vithist_t *vh, kbcore_t *kbc, FILE *fp) { glist_t gl; gnode_t *lgn, *gn; int32 i; vh_lmstate2vithist_t *lms2vh; mdef_t *mdef; lm_t *lm; mdef = kbcore_mdef (kbc); lm = kbcore_lm (kbc); fprintf (fp, "LMSTATE\n"); for (lgn = vh->lwidlist; lgn; lgn = gnode_next(lgn)) { i = (int32) gnode_int32 (lgn); gl = vh->lmstate_root[i]; assert (gl); for (gn = gl; gn; gn = gnode_next(gn)) { lms2vh = (vh_lmstate2vithist_t *) gnode_ptr (gn); fprintf (fp, "\t%s.%s -> %d\n", lm_wordstr(lm, i), mdef_ciphone_str (mdef, lms2vh->state), lms2vh->vhid); vithist_lmstate_subtree_dump (vh, kbc, lms2vh, 1, fp); } } fprintf (fp, "END_LMSTATE\n"); fflush (fp); }
struct winner_t dict_get_winner_wid(ngram_model_t *model, const char * word_grapheme, glist_t history_list, const int32 total_unigrams, int word_offset) { int32 current_prob = -2147483647; struct winner_t winner; int32 i = 0, j = 0; int nused; int32 ngram_order = ngram_model_get_size(model); int32 *history = ckd_calloc((size_t)ngram_order+1, sizeof(int32)); gnode_t *gn; const char *vocab; const char *sub; int32 prob; unigram_t unigram; for (gn = history_list; gn; gn = gnode_next(gn)) { history[ngram_order-j] = gnode_int32(gn); j++; if (j >= ngram_order) break; } for (i = 0; i < total_unigrams; i++) { vocab = ngram_word(model, i); unigram = dict_split_unigram(vocab); sub = word_grapheme + word_offset; if (dict_starts_with(unigram.word, sub)){ prob = ngram_ng_prob(model, i, history, j, &nused); if (current_prob < prob) { current_prob = prob; winner.winner_wid = i; winner.length_match = strlen(unigram.word); winner.len_phoneme = strlen(unigram.phone); } } if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); } if (history) ckd_free(history); return winner; }
char * dict_g2p(char const *word_grapheme, ngram_model_t *ngram_g2p_model) { char *final_phone = NULL; int totalh = 0; size_t increment = 1; int word_offset = 0; int j; size_t grapheme_len = 0, final_phoneme_len = 0; glist_t history_list = NULL; gnode_t *gn; int first = 0; const int32 *total_unigrams; struct winner_t winner; const char *word; unigram_t unigram; total_unigrams = ngram_model_get_counts(ngram_g2p_model); int32 wid_sentence = ngram_wid(ngram_g2p_model,"<s>"); // start with sentence history_list = glist_add_int32(history_list, wid_sentence); grapheme_len = strlen(word_grapheme); for (j = 0; j < grapheme_len; j += increment) { winner = dict_get_winner_wid(ngram_g2p_model, word_grapheme, history_list, *total_unigrams, word_offset); increment = winner.length_match; if (increment == 0) { E_ERROR("Error trying to find matching phoneme (%s) Exiting.. \n" , word_grapheme); return NULL; } history_list = glist_add_int32(history_list, winner.winner_wid); totalh = j + 1; word_offset += winner.length_match; final_phoneme_len += winner.len_phoneme; } history_list = glist_reverse(history_list); final_phone = ckd_calloc(1, final_phoneme_len * 2); for (gn = history_list; gn; gn = gnode_next(gn)) { if (!first) { first = 1; continue; } word = ngram_word(ngram_g2p_model, gnode_int32(gn)); if (!word) continue; unigram = dict_split_unigram(word); if (strcmp(unigram.phone, "_") == 0) { if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); continue; } strcat(final_phone, unigram.phone); strcat(final_phone, " "); if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); } if (history_list) glist_free(history_list); return final_phone; }
/* * Convert the glist of ssids to a composite sseq id. Return the composite ID. */ static s3ssid_t ssidlist2comsseq (glist_t g, mdef_t *mdef, dict2pid_t *dict2pid, hash_table_t *hs, /* For composite states */ hash_table_t *hp) /* For composite senone seq */ { int32 i, j, n, s, ssid; s3senid_t **sen; s3senid_t *comsenid; gnode_t *gn; n = glist_count (g); if (n <= 0) E_FATAL("Panic: length(ssidlist)= %d\n", n); /* Space for list of senones for each state, derived from the given glist */ sen = (s3senid_t **) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t *)); for (i = 0; i < mdef_n_emit_state (mdef); i++) { sen[i] = (s3senid_t *) ckd_calloc (n+1, sizeof(s3senid_t)); sen[i][0] = BAD_S3SENID; /* Sentinel */ } /* Space for composite senone ID for each state position */ comsenid = (s3senid_t *) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t)); for (gn = g; gn; gn = gnode_next(gn)) { ssid = gnode_int32 (gn); /* Expand ssid into individual states (senones); insert in sen[][] if not present */ for (i = 0; i < mdef_n_emit_state (mdef); i++) { s = mdef->sseq[ssid][i]; for (j = 0; (IS_S3SENID(sen[i][j])) && (sen[i][j] != s); j++); if (NOT_S3SENID(sen[i][j])) { sen[i][j] = s; sen[i][j+1] = BAD_S3SENID; } } } /* Convert senones list for each state position into composite state */ for (i = 0; i < mdef_n_emit_state (mdef); i++) { for (j = 0; IS_S3SENID(sen[i][j]); j++); assert (j > 0); j = hash_enter_bkey (hs, (char *)(sen[i]), j*sizeof(s3senid_t), dict2pid->n_comstate); if (j == dict2pid->n_comstate) dict2pid->n_comstate++; /* New composite state */ else ckd_free ((void *) sen[i]); comsenid[i] = j; } ckd_free (sen); /* Convert sequence of composite senids to composite sseq ID */ j = hash_enter_bkey (hp, (char *)comsenid, mdef->n_emit_state * sizeof(s3senid_t), dict2pid->n_comsseq); if (j == dict2pid->n_comsseq) { dict2pid->n_comsseq++; if (dict2pid->n_comsseq >= MAX_S3SENID) E_FATAL("#Composite sseq limit(%d) reached; increase MAX_S3SENID\n", dict2pid->n_comsseq); } else ckd_free ((void *) comsenid); return ((s3ssid_t)j); }