static int32
ngram_model_set_raw_score(ngram_model_t * base, int32 wid,
                          int32 * history, int32 n_hist, int32 * n_used)
{
    ngram_model_set_t *set = (ngram_model_set_t *) base;
    int32 mapwid;
    int32 score;
    int32 i;

    /* Truncate the history. */
    if (n_hist > base->n - 1)
        n_hist = base->n - 1;

    /* Interpolate if there is no current. */
    if (set->cur == -1) {
        score = base->log_zero;
        for (i = 0; i < set->n_models; ++i) {
            int32 j;
            /* Map word and history IDs for each model. */
            mapwid = set->widmap[wid][i];
            for (j = 0; j < n_hist; ++j) {
                if (history[j] == NGRAM_INVALID_WID)
                    set->maphist[j] = NGRAM_INVALID_WID;
                else
                    set->maphist[j] = set->widmap[history[j]][i];
            }
            score = logmath_add(base->lmath, score,
                                set->lweights[i] +
                                ngram_ng_prob(set->lms[i],
                                              mapwid, set->maphist, n_hist,
                                              n_used));
        }
    }
    else {
        int32 j;
        /* Map word and history IDs (FIXME: do this in a function?) */
        mapwid = set->widmap[wid][set->cur];
        for (j = 0; j < n_hist; ++j) {
            if (history[j] == NGRAM_INVALID_WID)
                set->maphist[j] = NGRAM_INVALID_WID;
            else
                set->maphist[j] = set->widmap[history[j]][set->cur];
        }
        score = ngram_ng_prob(set->lms[set->cur],
                              mapwid, set->maphist, n_hist, n_used);
    }

    return score;
}
Example #2
0
int32
ngram_probv(ngram_model_t * model, const char *word, ...)
{
    va_list history;
    const char *hword;
    int32 *histid;
    int32 n_hist;
    int32 n_used;
    int32 prob;

    va_start(history, word);
    n_hist = 0;
    while ((hword = va_arg(history, const char *)) != NULL)
        ++n_hist;
    va_end(history);

    histid = ckd_calloc(n_hist, sizeof(*histid));
    va_start(history, word);
    n_hist = 0;
    while ((hword = va_arg(history, const char *)) != NULL) {
        histid[n_hist] = ngram_wid(model, hword);
        ++n_hist;
    }
    va_end(history);

    prob = ngram_ng_prob(model, ngram_wid(model, word),
                         histid, n_hist, &n_used);
    ckd_free(histid);
    return prob;
}
static int32
ngram_model_set_add_ug(ngram_model_t * base, int32 wid, int32 lweight)
{
    ngram_model_set_t *set = (ngram_model_set_t *) base;
    int32 *newwid;
    int32 i, prob;

    /* At this point the word has already been added to the master
       model and we have a new word ID for it.  Add it to active
       submodels and track the word IDs. */
    newwid = ckd_calloc(set->n_models, sizeof(*newwid));
    prob = base->log_zero;
    for (i = 0; i < set->n_models; ++i) {
        int32 wprob, n_hist;

        /* Only add to active models. */
        if (set->cur == -1 || set->cur == i) {
            /* Did this word already exist? */
            newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]);
            if (newwid[i] == NGRAM_INVALID_WID) {
                /* Add it to the submodel. */
                newwid[i] =
                    ngram_model_add_word(set->lms[i], base->word_str[wid],
                                         (float32) logmath_exp(base->lmath,
                                                               lweight));
                if (newwid[i] == NGRAM_INVALID_WID) {
                    ckd_free(newwid);
                    return base->log_zero;
                }
            }
            /* Now get the unigram probability for the new word and either
             * interpolate it or use it (if this is the current model). */
            wprob =
                ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist);
            if (set->cur == i)
                prob = wprob;
            else if (set->cur == -1)
                prob =
                    logmath_add(base->lmath, prob,
                                set->lweights[i] + wprob);
        }
        else {
            newwid[i] = NGRAM_INVALID_WID;
        }
    }
    /* Okay we have the word IDs for this in all the submodels.  Now
       do some complicated memory mangling to add this to the
       widmap. */
    set->widmap =
        ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap));
    set->widmap[0] =
        ckd_realloc(set->widmap[0],
                    base->n_words * set->n_models * sizeof(**set->widmap));
    for (i = 0; i < base->n_words; ++i)
        set->widmap[i] = set->widmap[0] + i * set->n_models;
    memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid));
    ckd_free(newwid);
    return prob;
}
Example #4
0
void try_add_tree_element(ngram_model_t *model, int32 wid, int32 *history, int32 history_size,
        tree_element_t *tree_element_from, array_heap_t *heap) {
    int32 nused;
    int32 probability = ngram_ng_prob(model, wid, history, history_size, &nused);
    if (tree_element_from) {
        probability += tree_element_from->probability;
    }
    if (!array_heap_full(heap)) {
        array_heap_add(heap, probability, tree_element_new(wid, probability, tree_element_from));
    } else if (array_heap_min_key(heap) < probability) {
        ckd_free(array_heap_pop(heap));
        array_heap_add(heap, probability, tree_element_new(wid, probability, tree_element_from));
    }
}
Example #5
0
struct winner_t
dict_get_winner_wid(ngram_model_t *model, const char * word_grapheme, glist_t history_list, const int32 total_unigrams,
                    int word_offset)
{
    int32 current_prob = -2147483647;
    struct winner_t winner;
    int32 i = 0, j = 0;
    int nused;
    int32 ngram_order = ngram_model_get_size(model);
    int32 *history = ckd_calloc((size_t)ngram_order+1, sizeof(int32));
    gnode_t *gn;
    const char *vocab;
    const char *sub;
    int32 prob;
    unigram_t unigram;

    for (gn = history_list; gn; gn = gnode_next(gn)) {
        history[ngram_order-j] = gnode_int32(gn);
        j++;
        if (j >= ngram_order)
            break;
    }

    for (i = 0; i < total_unigrams; i++) {
        vocab = ngram_word(model, i);
        unigram  = dict_split_unigram(vocab);
        sub = word_grapheme + word_offset;
        if (dict_starts_with(unigram.word, sub)){
            prob = ngram_ng_prob(model, i, history, j, &nused);
            if (current_prob < prob) {
                current_prob = prob;
                winner.winner_wid = i;
                winner.length_match = strlen(unigram.word);
                winner.len_phoneme = strlen(unigram.phone);
            }
        }

        if (unigram.word)
            ckd_free(unigram.word);
        if (unigram.phone)
            ckd_free(unigram.phone);
    }

    if (history)
        ckd_free(history);

    return winner;
}
Example #6
0
int32
ngram_prob(ngram_model_t * model, const char *const *words, int32 n)
{
    int32 *ctx_id;
    int32 nused;
    int32 prob;
    int32 wid;
    uint32 i;

    ctx_id = (int32 *) ckd_calloc(n - 1, sizeof(*ctx_id));
    for (i = 1; i < (uint32) n; ++i)
        ctx_id[i - 1] = ngram_wid(model, words[i]);

    wid = ngram_wid(model, *words);
    prob = ngram_ng_prob(model, wid, ctx_id, n - 1, &nused);
    ckd_free(ctx_id);

    return prob;
}