Example #1
0
char *g2p(ngram_model_t *model, char *grapheme, uint32 level_count_limit) {
    int32 i, j, n, wid, fit_count;
    array_heap_t **tree_table;
    const char* unigram_text;
    char* phoneme;
    int32 *history_buffer;
    int32 start_wid, end_wid;
    const uint32 total_unigrams = *ngram_model_get_counts(model);

    n = strlen(grapheme);
    tree_table = ckd_calloc(n + 1, sizeof(array_heap_t *));
    for (i = 0; i < n; i++) {
        tree_table[i] = array_heap_new(level_count_limit);
    }
    tree_table[n] = array_heap_new(1);
    history_buffer = ckd_calloc(n + 1, sizeof(int32));
    start_wid = ngram_wid(model, "<s>");
    end_wid = ngram_wid(model, "</s>");

    for (i = 0; i < n; i++) {
        for (wid = 0; wid < total_unigrams; wid++) {
            unigram_text = ngram_word(model, wid);
            fit_count = graphemes_fit_count(grapheme, i, unigram_text);
            if (fit_count != 0) {
                try_add_tree_elements(model, wid, i == 0 ? NULL : tree_table[i - 1], tree_table[i + fit_count - 1],
                        history_buffer, start_wid);
            }
        }

    }

    try_add_tree_elements(model, end_wid, tree_table[n - 1], tree_table[n], history_buffer, start_wid);

    phoneme = (tree_table[n]->size == 0) ? NULL : unwind_phoneme(model,
            ((tree_element_t*) array_heap_element(tree_table[n], 0))->parent);

    for (i = 0; i <= n; i++) {
        for (j = 0; j < tree_table[i]->size; j++) {
            ckd_free(array_heap_element(tree_table[i], j));
        }
        array_heap_free(tree_table[i]);
    }

    ckd_free(tree_table);
    ckd_free(history_buffer);
    return phoneme;
}
Example #2
0
char *
dict_g2p(char const *word_grapheme, ngram_model_t *ngram_g2p_model) 
{
    char *final_phone = NULL;
    int totalh = 0;
    size_t increment = 1;
    int word_offset = 0;
    int j;
    size_t grapheme_len = 0, final_phoneme_len = 0;
    glist_t history_list = NULL;
    gnode_t *gn;
    int first = 0;
    const int32 *total_unigrams;
    struct winner_t winner;
    const char *word;
    unigram_t unigram;

    total_unigrams = ngram_model_get_counts(ngram_g2p_model);
    int32 wid_sentence = ngram_wid(ngram_g2p_model,"<s>"); // start with sentence
    history_list = glist_add_int32(history_list, wid_sentence);
    grapheme_len = strlen(word_grapheme);
    for (j = 0; j < grapheme_len; j += increment) {
        winner = dict_get_winner_wid(ngram_g2p_model, word_grapheme, history_list, *total_unigrams, word_offset);
        increment = winner.length_match;
        if (increment == 0) {
            E_ERROR("Error trying to find matching phoneme (%s) Exiting.. \n" , word_grapheme);
            return NULL;
        }
        history_list = glist_add_int32(history_list, winner.winner_wid);
        totalh = j + 1;
        word_offset += winner.length_match;
        final_phoneme_len += winner.len_phoneme;
    }

    history_list = glist_reverse(history_list);
    final_phone = ckd_calloc(1, final_phoneme_len * 2);
    for (gn = history_list; gn; gn = gnode_next(gn)) {
        if (!first) {
            first = 1;
            continue;
        }
        word = ngram_word(ngram_g2p_model, gnode_int32(gn));

        if (!word)
            continue;

        unigram  = dict_split_unigram(word);

        if (strcmp(unigram.phone, "_") == 0) {
            if (unigram.word)
                ckd_free(unigram.word);
            if (unigram.phone)
                ckd_free(unigram.phone);
            continue;
        }
        strcat(final_phone, unigram.phone);
        strcat(final_phone, " ");

        if (unigram.word)
            ckd_free(unigram.word);
        if (unigram.phone)
            ckd_free(unigram.phone);
    }

    if (history_list)
        glist_free(history_list);

    return final_phone;
}