Example #1
0
int
mdef_ciphone_id(mdef_t * m, char *ci)
{
    int32 id;
    if (hash_table_lookup_int32(m->ciphone_ht, ci, &id) < 0)
        return -1;
    return id;
}
Example #2
0
int32
ngram_wid(ngram_model_t * model, const char *word)
{
    int32 val;

    if (hash_table_lookup_int32(model->wid, word, &val) == -1)
        return ngram_unknown_wid(model);
    else
        return val;
}
Example #3
0
int32
ngram_unknown_wid(ngram_model_t * model)
{
    int32 val;

    /* FIXME: This could be memoized for speed if necessary. */
    /* Look up <UNK>, if not found return NGRAM_INVALID_WID. */
    if (hash_table_lookup_int32(model->wid, "<UNK>", &val) == -1)
        return NGRAM_INVALID_WID;
    else
        return val;
}
Example #4
0
s3wid_t
dict_wordid(dict_t *d, const char *word)
{
    int32 w;

    assert(d);
    assert(word);

    if (hash_table_lookup_int32(d->ht, word, &w) < 0)
        return (BAD_S3WID);
    return w;
}
Example #5
0
/**
 * Add a word to the word string and ID mapping.
 */
int32
ngram_add_word_internal(ngram_model_t * model,
                        const char *word, int32 classid)
{

    /* Check for hash collisions. */
    int32 wid;
    if (hash_table_lookup_int32(model->wid, word, &wid) == 0) {
        E_WARN("Omit duplicate word '%s'\n", word);
        return wid;
    }

    /* Take the next available word ID */
    wid = model->n_words;
    if (classid >= 0) {
        wid = NGRAM_CLASSWID(wid, classid);
    }

    /* Reallocate word_str if necessary. */
    if (model->n_words >= model->n_1g_alloc) {
        model->n_1g_alloc += UG_ALLOC_STEP;
        model->word_str = ckd_realloc(model->word_str,
                                      sizeof(*model->word_str) *
                                      model->n_1g_alloc);
    }
    /* Add the word string in the appropriate manner. */
    /* Class words are always dynamically allocated. */
    model->word_str[model->n_words] = ckd_salloc(word);
    /* Now enter it into the hash table. */
    if (hash_table_enter_int32
        (model->wid, model->word_str[model->n_words], wid) != wid) {
        E_ERROR
            ("Hash insertion failed for word %s => %p (should not happen)\n",
             model->word_str[model->n_words], (void *) (long) (wid));
    }
    /* Increment number of words. */
    ++model->n_words;
    return wid;
}
fsg_model_t *
fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
{
    fsg_model_t *fsg;
    hash_table_t *vocab;
    hash_iter_t *itor;
    int32 lastwid;
    char **wordptr;
    char *lineptr;
    char *fsgname;
    int32 lineno;
    int32 n, i, j;
    int n_state, n_trans, n_null_trans;
    glist_t nulls;
    float32 p;

    lineno = 0;
    vocab = hash_table_new(32, FALSE);
    wordptr = NULL;
    lineptr = NULL;
    nulls = NULL;
    fsgname = NULL;
    fsg = NULL;

    /* Scan upto FSG_BEGIN header */
    for (;;) {
        n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
        if (n < 0) {
            E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
            goto parse_error;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
            if (n > 2) {
                E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
                        lineno);
                goto parse_error;
            }
            break;
        }
    }
    /* Save FSG name, or it will get clobbered below :(.
     * If name is missing, try the default.
     */
    if (n == 2) {
        fsgname = ckd_salloc(wordptr[1]);
    }
    else {
        E_WARN("FSG name is missing\n");
        fsgname = ckd_salloc("unknown");
    }

    /* Read #states */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &n_state) != 1)
        || (n_state <= 0)) {
        E_ERROR
            ("Line[%d]: #states declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Now create the FSG. */
    fsg = fsg_model_init(fsgname, lmath, lw, n_state);
    ckd_free(fsgname);
    fsgname = NULL;

    /* Read start state */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
        || (fsg->start_state < 0)
        || (fsg->start_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: start state declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Read final state */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
        || (fsg->final_state < 0)
        || (fsg->final_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: final state declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Read transitions */
    lastwid = 0;
    n_trans = n_null_trans = 0;
    for (;;) {
        int32 wid, tprob;

        n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
        if (n <= 0) {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
            break;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
            || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {


            if (((n != 4) && (n != 5))
                || (sscanf(wordptr[1], "%d", &i) != 1)
                || (sscanf(wordptr[2], "%d", &j) != 1)
                || (i < 0) || (i >= fsg->n_state)
                || (j < 0) || (j >= fsg->n_state)) {
                E_ERROR
                    ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
                     lineno);
                goto parse_error;
            }

            p = atof_c(wordptr[3]);
            if ((p <= 0.0) || (p > 1.0)) {
                E_ERROR
                    ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
                     lineno);
                goto parse_error;
            }
        }
        else {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
        /* Add word to "dictionary". */
        if (n > 4) {
            if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
                (void) hash_table_enter_int32(vocab,
                                              ckd_salloc(wordptr[4]),
                                              lastwid);
                wid = lastwid;
                ++lastwid;
            }
            fsg_model_trans_add(fsg, i, j, tprob, wid);
            ++n_trans;
        }
        else {
            if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
                ++n_null_trans;
                nulls =
                    glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
            }
        }
    }

    E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
           fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);


    /* Now create a string table from the "dictionary" */
    fsg->n_word = hash_table_inuse(vocab);
    fsg->n_word_alloc = fsg->n_word + 10;       /* Pad it a bit. */
    fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
    for (itor = hash_table_iter(vocab); itor;
         itor = hash_table_iter_next(itor)) {
        char const *word = hash_entry_key(itor->ent);
        int32 wid = (int32) (long) hash_entry_val(itor->ent);
        fsg->vocab[wid] = (char *) word;
    }
    hash_table_free(vocab);

    /* Do transitive closure on null transitions */
    nulls = fsg_model_null_trans_closure(fsg, nulls);
    glist_free(nulls);

    ckd_free(lineptr);
    ckd_free(wordptr);

    return fsg;

  parse_error:
    for (itor = hash_table_iter(vocab); itor;
         itor = hash_table_iter_next(itor))
        ckd_free((char *) hash_entry_key(itor->ent));
    glist_free(nulls);
    hash_table_free(vocab);
    ckd_free(fsgname);
    ckd_free(lineptr);
    ckd_free(wordptr);
    fsg_model_free(fsg);
    return NULL;
}
Example #7
0
static void
read_ngram_instance(lineiter_t ** li, hash_table_t * wid,
                    logmath_t * lmath, int order, int order_max,
                    ngram_raw_t * raw_ngram)
{
    int n;
    int words_expected;
    int i;
    char *wptr[NGRAM_MAX_ORDER + 1];
    uint32 *word_out;

    *li = lineiter_next(*li);
    if (*li == NULL) {
        E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n",
                order);
        return;
    }
    string_trim((*li)->buf, STRING_BOTH);
    words_expected = order + 1;

    if ((n =
         str2words((*li)->buf, wptr,
                   NGRAM_MAX_ORDER + 1)) < words_expected) {
        if ((*li)->buf[0] != '\0') {
            E_WARN("Format error; %d-gram ignored: %s\n", order,
                   (*li)->buf);
        }
    }
    else {
        if (order == order_max) {
            raw_ngram->weights =
                (float *) ckd_calloc(1, sizeof(*raw_ngram->weights));
            raw_ngram->weights[0] = atof_c(wptr[0]);
            if (raw_ngram->weights[0] > 0) {
                E_WARN("%d-gram [%s] has positive probability. Zeroize\n",
                       order, wptr[1]);
                raw_ngram->weights[0] = 0.0f;
            }
            raw_ngram->weights[0] =
                logmath_log10_to_log_float(lmath, raw_ngram->weights[0]);
        }
        else {
            float weight, backoff;
            raw_ngram->weights =
                (float *) ckd_calloc(2, sizeof(*raw_ngram->weights));

            weight = atof_c(wptr[0]);
            if (weight > 0) {
                E_WARN("%d-gram [%s] has positive probability. Zeroize\n",
                       order, wptr[1]);
                raw_ngram->weights[0] = 0.0f;
            }
            else {
                raw_ngram->weights[0] =
                    logmath_log10_to_log_float(lmath, weight);
            }

            if (n == order + 1) {
                raw_ngram->weights[1] = 0.0f;
            }
            else {
                backoff = atof_c(wptr[order + 1]);
                raw_ngram->weights[1] =
                    logmath_log10_to_log_float(lmath, backoff);
            }
        }
        raw_ngram->words =
            (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words));
        for (word_out = raw_ngram->words + order - 1, i = 1;
             word_out >= raw_ngram->words; --word_out, i++) {
            hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out);
        }
    }
}
Example #8
0
s3wid_t
dict_add_word(dict_t * d, char const *word, s3cipid_t const * p, int32 np)
{
    int32 len;
    dictword_t *wordp;
    s3wid_t newwid;
    char *wword;

    if (d->n_word >= d->max_words) {
        E_INFO("Reallocating to %d KiB for word entries\n",
               (d->max_words + S3DICT_INC_SZ) * sizeof(dictword_t) / 1024);
        d->word =
            (dictword_t *) ckd_realloc(d->word,
                                       (d->max_words +
                                        S3DICT_INC_SZ) * sizeof(dictword_t));
        d->max_words = d->max_words + S3DICT_INC_SZ;
    }

    wordp = d->word + d->n_word;
    wordp->word = (char *) ckd_salloc(word);    /* Freed in dict_free */

    /* Associate word string with d->n_word in hash table */
    if (hash_table_enter_int32(d->ht, wordp->word, d->n_word) != d->n_word) {
        ckd_free(wordp->word);
        wordp->word = NULL;
        return BAD_S3WID;
    }

    /* Fill in word entry, and set defaults */
    if (p && (np > 0)) {
        wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t));      /* Freed in dict_free */
        memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t));
        wordp->pronlen = np;
    }
    else {
        wordp->ciphone = NULL;
        wordp->pronlen = 0;
    }
    wordp->alt = BAD_S3WID;
    wordp->basewid = d->n_word;

    /* Determine base/alt wids */
    wword = ckd_salloc(word);
    if ((len = dict_word2basestr(wword)) > 0) {
	int32 w;

        /* Truncated to a baseword string; find its ID */
        if (hash_table_lookup_int32(d->ht, wword, &w) < 0) {
            E_ERROR("Missing base word for: %s\n", word);
            ckd_free(wword);
            ckd_free(wordp->word);
            wordp->word = NULL;
            return BAD_S3WID;
        }

        /* Link into alt list */
        wordp->basewid = w;
        wordp->alt = d->word[w].alt;
        d->word[w].alt = d->n_word;
    }
    ckd_free(wword);

    newwid = d->n_word++;

    return newwid;
}
static int
read_ngram_instance(lineiter_t ** li, hash_table_t * wid,
                    logmath_t * lmath, int order, int order_max,
                    ngram_raw_t * raw_ngram)
{
    int n;
    int words_expected;
    int i;
    char *wptr[NGRAM_MAX_ORDER + 1];
    uint32 *word_out;

    if (*li) 
        *li = lineiter_next(*li);
    if (*li == NULL) {
        E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n",
                order);
        return -1;
    }
    words_expected = order + 1;
    if ((n =
         str2words((*li)->buf, wptr,
                   NGRAM_MAX_ORDER + 1)) < words_expected) {
        E_ERROR("Format error; %d-gram ignored: %s\n", order, (*li)->buf);
        return -1;
    }

    raw_ngram->order = order;

    if (order == order_max) {
        raw_ngram->prob = atof_c(wptr[0]);
        if (raw_ngram->prob > 0) {
            E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]);
            raw_ngram->prob = 0.0f;
        }
        raw_ngram->prob =
            logmath_log10_to_log_float(lmath, raw_ngram->prob);
    }
    else {
        float weight, backoff;

        weight = atof_c(wptr[0]);
        if (weight > 0) {
            E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]);
            raw_ngram->prob = 0.0f;
        }
        else {
            raw_ngram->prob =
                logmath_log10_to_log_float(lmath, weight);
        }

        if (n == order + 1) {
            raw_ngram->backoff = 0.0f;
        }
        else {
            backoff = atof_c(wptr[order + 1]);
            raw_ngram->backoff =
                logmath_log10_to_log_float(lmath, backoff);
        }
    }
    raw_ngram->words =
        (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words));
    for (word_out = raw_ngram->words + order - 1, i = 1;
         word_out >= raw_ngram->words; --word_out, i++) {
        hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out);
    }
    return 0;
}