Esempio n. 1
0
int
ps_add_word(ps_decoder_t *ps,
            char const *word,
            char const *phones,
            int update)
{
    int32 wid, lmwid;
    ngram_model_t *lmset;
    s3cipid_t *pron;
    char **phonestr, *tmp;
    int np, i, rv;

    /* Parse phones into an array of phone IDs. */
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            return -1;
        }
    }
    /* No longer needed. */
    ckd_free(phonestr);
    ckd_free(tmp);

    /* Add it to the dictionary. */
    if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    /* No longer needed. */
    ckd_free(pron);

    /* Now we also have to add it to dict2pid. */
    dict2pid_add_word(ps->d2p, wid);

    if ((lmset = ps_get_lmset(ps)) != NULL) {
        /* Add it to the LM set (meaning, the current LM).  In a perfect
         * world, this would result in the same WID, but because of the
         * weird way that word IDs are handled, it doesn't. */
        if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
            == NGRAM_INVALID_WID)
            return -1;
    }
 
    /* Rebuild the widmap and search tree if requested. */
    if (update) {
        if ((rv = ps_search_reinit(ps->search, ps->dict, ps->d2p) < 0))
            return rv;
    }
    return wid;
}
static int32
ngram_model_set_add_ug(ngram_model_t * base, int32 wid, int32 lweight)
{
    ngram_model_set_t *set = (ngram_model_set_t *) base;
    int32 *newwid;
    int32 i, prob;

    /* At this point the word has already been added to the master
       model and we have a new word ID for it.  Add it to active
       submodels and track the word IDs. */
    newwid = ckd_calloc(set->n_models, sizeof(*newwid));
    prob = base->log_zero;
    for (i = 0; i < set->n_models; ++i) {
        int32 wprob, n_hist;

        /* Only add to active models. */
        if (set->cur == -1 || set->cur == i) {
            /* Did this word already exist? */
            newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]);
            if (newwid[i] == NGRAM_INVALID_WID) {
                /* Add it to the submodel. */
                newwid[i] =
                    ngram_model_add_word(set->lms[i], base->word_str[wid],
                                         (float32) logmath_exp(base->lmath,
                                                               lweight));
                if (newwid[i] == NGRAM_INVALID_WID) {
                    ckd_free(newwid);
                    return base->log_zero;
                }
            }
            /* Now get the unigram probability for the new word and either
             * interpolate it or use it (if this is the current model). */
            wprob =
                ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist);
            if (set->cur == i)
                prob = wprob;
            else if (set->cur == -1)
                prob =
                    logmath_add(base->lmath, prob,
                                set->lweights[i] + wprob);
        }
        else {
            newwid[i] = NGRAM_INVALID_WID;
        }
    }
    /* Okay we have the word IDs for this in all the submodels.  Now
       do some complicated memory mangling to add this to the
       widmap. */
    set->widmap =
        ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap));
    set->widmap[0] =
        ckd_realloc(set->widmap[0],
                    base->n_words * set->n_models * sizeof(**set->widmap));
    for (i = 0; i < base->n_words; ++i)
        set->widmap[i] = set->widmap[0] + i * set->n_models;
    memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid));
    ckd_free(newwid);
    return prob;
}
Esempio n. 3
0
int32
ngram_model_add_class(ngram_model_t * model,
                      const char *classname,
                      float32 classweight,
                      char **words, const float32 * weights, int32 n_words)
{
    ngram_class_t *lmclass;
    glist_t classwords = NULL;
    int32 i, start_wid = -1;
    int32 classid, tag_wid;

    /* Check if classname already exists in model.  If not, add it. */
    if ((tag_wid =
         ngram_wid(model, classname)) == ngram_unknown_wid(model)) {
        tag_wid = ngram_model_add_word(model, classname, classweight);
        if (tag_wid == NGRAM_INVALID_WID)
            return -1;
    }

    if (model->n_classes == 128) {
        E_ERROR("Number of classes cannot exceed 128 (sorry)\n");
        return -1;
    }
    classid = model->n_classes;
    for (i = 0; i < n_words; ++i) {
        int32 wid;

        wid = ngram_add_word_internal(model, words[i], classid);
        if (wid == NGRAM_INVALID_WID)
            return -1;
        if (start_wid == -1)
            start_wid = NGRAM_BASEWID(wid);
        classwords = glist_add_float32(classwords, weights[i]);
    }
    classwords = glist_reverse(classwords);
    lmclass = ngram_class_new(model, tag_wid, start_wid, classwords);
    glist_free(classwords);
    if (lmclass == NULL)
        return -1;

    ++model->n_classes;
    if (model->classes == NULL)
        model->classes = ckd_calloc(1, sizeof(*model->classes));
    else
        model->classes = ckd_realloc(model->classes,
                                     model->n_classes *
                                     sizeof(*model->classes));
    model->classes[classid] = lmclass;
    return classid;
}
Esempio n. 4
0
int
ps_add_word(ps_decoder_t *ps,
            char const *word,
            char const *phones,
            int update)
{
    int32 wid;
    s3cipid_t *pron;
    hash_iter_t *search_it;
    char **phonestr, *tmp;
    int np, i, rv;

    /* Parse phones into an array of phone IDs. */
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            return -1;
        }
    }
    /* No longer needed. */
    ckd_free(phonestr);
    ckd_free(tmp);

    /* Add it to the dictionary. */
    if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    /* No longer needed. */
    ckd_free(pron);

    /* Now we also have to add it to dict2pid. */
    dict2pid_add_word(ps->d2p, wid);

    /* TODO: we definitely need to refactor this */
    for (search_it = hash_table_iter(ps->searches); search_it;
         search_it = hash_table_iter_next(search_it)) {
        ps_search_t *search = hash_entry_val(search_it->ent);
        if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) {
            ngram_model_t *lmset = ((ngram_search_t *) search)->lmset;
            if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
                hash_table_iter_free(search_it);
                return -1;
            }
        }

        if (update) {
            if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) {
                hash_table_iter_free(search_it);
                return rv;
            }
        }
    }

    /* Rebuild the widmap and search tree if requested. */
    return wid;
}