struct winner_t dict_get_winner_wid(ngram_model_t *model, const char * word_grapheme, glist_t history_list, const int32 total_unigrams, int word_offset) { int32 current_prob = -2147483647; struct winner_t winner; int32 i = 0, j = 0; int nused; int32 ngram_order = ngram_model_get_size(model); int32 *history = ckd_calloc((size_t)ngram_order+1, sizeof(int32)); gnode_t *gn; const char *vocab; const char *sub; int32 prob; unigram_t unigram; for (gn = history_list; gn; gn = gnode_next(gn)) { history[ngram_order-j] = gnode_int32(gn); j++; if (j >= ngram_order) break; } for (i = 0; i < total_unigrams; i++) { vocab = ngram_word(model, i); unigram = dict_split_unigram(vocab); sub = word_grapheme + word_offset; if (dict_starts_with(unigram.word, sub)){ prob = ngram_ng_prob(model, i, history, j, &nused); if (current_prob < prob) { current_prob = prob; winner.winner_wid = i; winner.length_match = strlen(unigram.word); winner.len_phoneme = strlen(unigram.phone); } } if (unigram.word) ckd_free(unigram.word); if (unigram.phone) ckd_free(unigram.phone); } if (history) ckd_free(history); return winner; }
static int calc_entropy(ngram_model_t *lm, char **words, int32 n, int32 *out_n_ccs, int32 *out_n_oovs, int32 *out_lm_score) { int32 *wids; int32 startwid; int32 i, ch, nccs, noovs, unk; if (n == 0) return 0; unk = ngram_unknown_wid(lm); /* Reverse this array into an array of word IDs. */ wids = ckd_calloc(n, sizeof(*wids)); for (i = 0; i < n; ++i) wids[n-i-1] = ngram_wid(lm, words[i]); /* Skip <s> as it's a context cue (HACK, this should be configurable). */ startwid = ngram_wid(lm, "<s>"); /* Now evaluate the list of words in reverse using the * remainder of the array as the history. */ ch = noovs = nccs = 0; for (i = 0; i < n; ++i) { int32 n_used; int32 prob; /* Skip <s> as it's a context cue (HACK, this should be configurable). */ if (wids[i] == startwid) { ++nccs; continue; } /* Skip and count OOVs. */ if (wids[i] == NGRAM_INVALID_WID || wids[i] == unk) { ++noovs; continue; } /* Sum up information for each N-gram */ prob = ngram_ng_score(lm, wids[i], wids + i + 1, n - i - 1, &n_used); if (verbose) { int m; printf("log P(%s|", ngram_word(lm, wids[i])); m = i + ngram_model_get_size(lm) - 1; if (m >= n) m = n - 1; while (m > i) { printf("%s ", ngram_word(lm, wids[m--])); } printf(") = %d\n", prob); } ch -= prob; } if (out_n_ccs) *out_n_ccs = nccs; if (out_n_oovs) *out_n_oovs = noovs; /* Calculate cross-entropy CH = - 1/N sum log P(W|H) */ n -= (nccs + noovs); if (n <= 0) return 0; if (out_lm_score) *out_lm_score = -ch; return ch / n; }