Пример #1
0
int
cmd_ln_free_r(cmd_ln_t *cmdln)
{
    if (cmdln == NULL)
        return 0;
    if (--cmdln->refcount > 0)
        return cmdln->refcount;

    if (cmdln->ht) {
        glist_t entries;
        gnode_t *gn;
        int32 n;

        entries = hash_table_tolist(cmdln->ht, &n);
        for (gn = entries; gn; gn = gnode_next(gn)) {
            hash_entry_t *e = gnode_ptr(gn);
            cmd_ln_val_free((cmd_ln_val_t *)e->val);
        }
        glist_free(entries);
        hash_table_free(cmdln->ht);
        cmdln->ht = NULL;
    }

    if (cmdln->f_argv) {
        int32 i;
        for (i = 0; i < cmdln->f_argc; ++i) {
            ckd_free(cmdln->f_argv[i]);
        }
        ckd_free(cmdln->f_argv);
        cmdln->f_argv = NULL;
        cmdln->f_argc = 0;
    }
    ckd_free(cmdln);
    return 0;
}
static void
build_widmap(ngram_model_t * base, logmath_t * lmath, int32 n)
{
    ngram_model_set_t *set = (ngram_model_set_t *) base;
    ngram_model_t **models = set->lms;
    hash_table_t *vocab;
    glist_t hlist;
    gnode_t *gn;
    int32 i;

    /* Construct a merged vocabulary and a set of word-ID mappings. */
    vocab = hash_table_new(models[0]->n_words, FALSE);
    /* Create the set of merged words. */
    for (i = 0; i < set->n_models; ++i) {
        int32 j;
        for (j = 0; j < models[i]->n_words; ++j) {
            /* Ignore collisions. */
            (void) hash_table_enter_int32(vocab, models[i]->word_str[j],
                                          j);
        }
    }
    /* Create the array of words, then sort it. */
    if (hash_table_lookup(vocab, "<UNK>", NULL) != 0)
        (void) hash_table_enter_int32(vocab, "<UNK>", 0);
    /* Now we know the number of unigrams, initialize the base model. */
    ngram_model_init(base, &ngram_model_set_funcs, lmath, n,
                     hash_table_inuse(vocab));
    base->writable = FALSE;     /* We will reuse the pointers from the submodels. */
    i = 0;
    hlist = hash_table_tolist(vocab, NULL);
    for (gn = hlist; gn; gn = gnode_next(gn)) {
        hash_entry_t *ent = gnode_ptr(gn);
        base->word_str[i++] = (char *) ent->key;
    }
    glist_free(hlist);
    qsort(base->word_str, base->n_words, sizeof(*base->word_str),
          my_compare);

    /* Now create the word ID mappings. */
    if (set->widmap)
        ckd_free_2d((void **) set->widmap);
    set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models,
                                           sizeof(**set->widmap));
    for (i = 0; i < base->n_words; ++i) {
        int32 j;
        /* Also create the master wid mapping. */
        (void) hash_table_enter_int32(base->wid, base->word_str[i], i);
        /* printf("%s: %d => ", base->word_str[i], i); */
        for (j = 0; j < set->n_models; ++j) {
            set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]);
            /* printf("%d ", set->widmap[i][j]); */
        }
        /* printf("\n"); */
    }
    hash_table_free(vocab);
}
Пример #3
0
static void
sseq_compress(mdef_t * m)
{
    hash_table_t *h;
    s3senid_t **sseq;
    int32 n_sseq;
    int32 p, j, k;
    glist_t g;
    gnode_t *gn;
    hash_entry_t *he;

    k = m->n_emit_state * sizeof(s3senid_t);

    h = hash_table_new(m->n_phone, HASH_CASE_YES);
    n_sseq = 0;

    /* Identify unique senone-sequence IDs.  BUG: tmat-id not being considered!! */
    for (p = 0; p < m->n_phone; p++) {
        /* Add senone sequence to hash table */
	if ((j = (long)
             hash_table_enter_bkey(h, (char *) (m->sseq[p]), k,
				   (void *)(long)n_sseq)) == n_sseq)
            n_sseq++;

        m->phone[p].ssid = j;
    }

    /* Generate compacted sseq table */
    sseq = (s3senid_t **) ckd_calloc_2d(n_sseq, m->n_emit_state, sizeof(s3senid_t));    /* freed in mdef_free() */

    g = hash_table_tolist(h, &j);
    assert(j == n_sseq);

    for (gn = g; gn; gn = gnode_next(gn)) {
        he = (hash_entry_t *) gnode_ptr(gn);
        j = (int32)(long)hash_entry_val(he);
        memcpy(sseq[j], hash_entry_key(he), k);
    }
    glist_free(g);

    /* Free the old, temporary senone sequence table, replace with compacted one */
    ckd_free_2d((void **) m->sseq);
    m->sseq = sseq;
    m->n_sseq = n_sseq;

    hash_table_free(h);
}
Пример #4
0
int32
ngram_model_read_classdef(ngram_model_t *model,
                          const char *file_name)
{
    hash_table_t *classes;
    glist_t hl = NULL;
    gnode_t *gn;
    int32 rv = -1;

    classes = hash_table_new(0, FALSE);
    if (read_classdef_file(classes, file_name) < 0) {
        hash_table_free(classes);
        return -1;
    }
    
    /* Create a new class in the language model for each classdef. */
    hl = hash_table_tolist(classes, NULL);
    for (gn = hl; gn; gn = gnode_next(gn)) {
        hash_entry_t *he = gnode_ptr(gn);
        classdef_t *classdef = he->val;

        if (ngram_model_add_class(model, he->key, 1.0,
                                  classdef->words,
                                  classdef->weights,
                                  classdef->n_words) < 0)
            goto error_out;
    }
    rv = 0;

error_out:
    for (gn = hl; gn; gn = gnode_next(gn)) {
        hash_entry_t *he = gnode_ptr(gn);
        ckd_free((char *)he->key);
        classdef_free(he->val);
    }
    glist_free(hl);
    hash_table_free(classes);
    return rv;
}
ngram_model_t *
ngram_model_set_read(cmd_ln_t * config,
                     const char *lmctlfile, logmath_t * lmath)
{
    FILE *ctlfp;
    glist_t lms = NULL;
    glist_t lmnames = NULL;
    __BIGSTACKVARIABLE__ char str[1024];
    ngram_model_t *set = NULL;
    hash_table_t *classes;
    char *basedir, *c;

    /* Read all the class definition files to accumulate a mapping of
     * classnames to definitions. */
    classes = hash_table_new(0, FALSE);
    if ((ctlfp = fopen(lmctlfile, "r")) == NULL) {
        E_ERROR_SYSTEM("Failed to open %s", lmctlfile);
        return NULL;
    }

    /* Try to find the base directory to append to relative paths in
     * the lmctl file. */
    if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) {
        /* Include the trailing slash. */
        basedir = ckd_calloc(c - lmctlfile + 2, 1);
        memcpy(basedir, lmctlfile, c - lmctlfile + 1);
    }
    else {
        basedir = NULL;
    }
    E_INFO("Reading LM control file '%s'\n", lmctlfile);
    if (basedir)
        E_INFO("Will prepend '%s' to unqualified paths\n", basedir);

    if (fscanf(ctlfp, "%1023s", str) == 1) {
        if (strcmp(str, "{") == 0) {
            /* Load LMclass files */
            while ((fscanf(ctlfp, "%1023s", str) == 1)
                   && (strcmp(str, "}") != 0)) {
                char *deffile;
                if (basedir && !path_is_absolute(str))
                    deffile = string_join(basedir, str, NULL);
                else
                    deffile = ckd_salloc(str);
                E_INFO("Reading classdef from '%s'\n", deffile);
                if (read_classdef_file(classes, deffile) < 0) {
                    ckd_free(deffile);
                    goto error_out;
                }
                ckd_free(deffile);
            }

            if (strcmp(str, "}") != 0) {
                E_ERROR("Unexpected EOF in %s\n", lmctlfile);
                goto error_out;
            }

            /* This might be the first LM name. */
            if (fscanf(ctlfp, "%1023s", str) != 1)
                str[0] = '\0';
        }
    }
    else
        str[0] = '\0';

    /* Read in one LM at a time and add classes to them as necessary. */
    while (str[0] != '\0') {
        char *lmfile;
        ngram_model_t *lm;

        if (basedir && str[0] != '/' && str[0] != '\\')
            lmfile = string_join(basedir, str, NULL);
        else
            lmfile = ckd_salloc(str);
        E_INFO("Reading lm from '%s'\n", lmfile);
        lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath);
        if (lm == NULL) {
            ckd_free(lmfile);
            goto error_out;
        }
        if (fscanf(ctlfp, "%1023s", str) != 1) {
            E_ERROR("LMname missing after LMFileName '%s'\n", lmfile);
            ckd_free(lmfile);
            goto error_out;
        }
        ckd_free(lmfile);
        lms = glist_add_ptr(lms, lm);
        lmnames = glist_add_ptr(lmnames, ckd_salloc(str));

        if (fscanf(ctlfp, "%1023s", str) == 1) {
            if (strcmp(str, "{") == 0) {
                /* LM uses classes; read their names */
                while ((fscanf(ctlfp, "%1023s", str) == 1) &&
                       (strcmp(str, "}") != 0)) {
                    void *val;
                    classdef_t *classdef;

                    if (hash_table_lookup(classes, str, &val) == -1) {
                        E_ERROR("Unknown class %s in control file\n", str);
                        goto error_out;
                    }
                    classdef = val;
                    if (ngram_model_add_class(lm, str, 1.0,
                                              classdef->words,
                                              classdef->weights,
                                              classdef->n_words) < 0) {
                        goto error_out;
                    }
                    E_INFO("Added class %s containing %d words\n",
                           str, classdef->n_words);
                }
                if (strcmp(str, "}") != 0) {
                    E_ERROR("Unexpected EOF in %s\n", lmctlfile);
                    goto error_out;
                }
                if (fscanf(ctlfp, "%1023s", str) != 1)
                    str[0] = '\0';
            }
        }
        else
            str[0] = '\0';
    }
    fclose(ctlfp);

    /* Now construct arrays out of lms and lmnames, and build an
     * ngram_model_set. */
    lms = glist_reverse(lms);
    lmnames = glist_reverse(lmnames);
    {
        int32 n_models;
        ngram_model_t **lm_array;
        char **name_array;
        gnode_t *lm_node, *name_node;
        int32 i;

        n_models = glist_count(lms);
        lm_array = ckd_calloc(n_models, sizeof(*lm_array));
        name_array = ckd_calloc(n_models, sizeof(*name_array));
        lm_node = lms;
        name_node = lmnames;
        for (i = 0; i < n_models; ++i) {
            lm_array[i] = gnode_ptr(lm_node);
            name_array[i] = gnode_ptr(name_node);
            lm_node = gnode_next(lm_node);
            name_node = gnode_next(name_node);
        }
        set = ngram_model_set_init(config, lm_array, name_array,
                                   NULL, n_models);

        for (i = 0; i < n_models; ++i) {
            ngram_model_free(lm_array[i]);
        }
        ckd_free(lm_array);
        ckd_free(name_array);
    }
  error_out:
    {
        gnode_t *gn;
        glist_t hlist;

        if (set == NULL) {
            for (gn = lms; gn; gn = gnode_next(gn)) {
                ngram_model_free(gnode_ptr(gn));
            }
        }
        glist_free(lms);
        for (gn = lmnames; gn; gn = gnode_next(gn)) {
            ckd_free(gnode_ptr(gn));
        }
        glist_free(lmnames);
        hlist = hash_table_tolist(classes, NULL);
        for (gn = hlist; gn; gn = gnode_next(gn)) {
            hash_entry_t *he = gnode_ptr(gn);
            ckd_free((char *) he->key);
            classdef_free(he->val);
        }
        glist_free(hlist);
        hash_table_free(classes);
        ckd_free(basedir);
    }
    return set;
}