char *g2p(ngram_model_t *model, char *grapheme, uint32 level_count_limit) { int32 i, j, n, wid, fit_count; array_heap_t **tree_table; const char* unigram_text; char* phoneme; int32 *history_buffer; int32 start_wid, end_wid; const uint32 total_unigrams = *ngram_model_get_counts(model); n = strlen(grapheme); tree_table = ckd_calloc(n + 1, sizeof(array_heap_t *)); for (i = 0; i < n; i++) { tree_table[i] = array_heap_new(level_count_limit); } tree_table[n] = array_heap_new(1); history_buffer = ckd_calloc(n + 1, sizeof(int32)); start_wid = ngram_wid(model, "<s>"); end_wid = ngram_wid(model, "</s>"); for (i = 0; i < n; i++) { for (wid = 0; wid < total_unigrams; wid++) { unigram_text = ngram_word(model, wid); fit_count = graphemes_fit_count(grapheme, i, unigram_text); if (fit_count != 0) { try_add_tree_elements(model, wid, i == 0 ? NULL : tree_table[i - 1], tree_table[i + fit_count - 1], history_buffer, start_wid); } } } try_add_tree_elements(model, end_wid, tree_table[n - 1], tree_table[n], history_buffer, start_wid); phoneme = (tree_table[n]->size == 0) ? NULL : unwind_phoneme(model, ((tree_element_t*) array_heap_element(tree_table[n], 0))->parent); for (i = 0; i <= n; i++) { for (j = 0; j < tree_table[i]->size; j++) { ckd_free(array_heap_element(tree_table[i], j)); } array_heap_free(tree_table[i]); } ckd_free(tree_table); ckd_free(history_buffer); return phoneme; }
char * dict_g2p(char const *word_grapheme, ngram_model_t *ngram_g2p_model) { char *final_phone = NULL; int totalh = 0; size_t increment = 1; int word_offset = 0; int j; size_t grapheme_len = 0, final_phoneme_len = 0; glist_t history_list = NULL; gnode_t *gn; int first = 0; const int32 *total_unigrams; struct winner_t winner; const char *word; unigram_t unigram; total_unigrams = ngram_model_get_counts(ngram_g2p_model); int32 wid_sentence = ngram_wid(ngram_g2p_model,"<s>"); // start with sentence history_list = glist_add_int32(history_list, wid_sentence); grapheme_len = strlen(word_grapheme); for (j = 0; j < grapheme_len; j += increment) { winner = dict_get_winner_wid(ngram_g2p_model, word_grapheme, history_list, *total_unigrams, word_offset); increment = winner.length_match; if (increment == 0) { E_ERROR("Error trying to find matching phoneme (%s) Exiting.. \n" , word_grapheme); return NULL; } history_list = glist_add_int32(history_list, winner.winner_wid); totalh = j + 1; word_offset += winner.length_match; final_phoneme_len += winner.len_phoneme; } history_list = glist_reverse(history_list); final_phone = ckd_calloc(1, final_phoneme_len * 2); for (gn = history_list; gn; gn = gnode_next(gn)) { if (!first) { first = 1; continue; } word = ngram_word(ngram_g2p_model, gnode_int32(gn)); if (!word) continue; unigram = dict_split_unigram(word); if (strcmp(unigram.phone, "_") == 0) { if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); continue; } strcat(final_phone, unigram.phone); strcat(final_phone, " "); if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); } if (history_list) glist_free(history_list); return final_phone; }