Beispiel #1
0
int
fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
                  char const *altword)
{
    int i, basewid, altwid;
    int ntrans;

    /* FIXME: This will get slow, eventually... */
    for (basewid = 0; basewid < fsg->n_word; ++basewid)
        if (0 == strcmp(fsg->vocab[basewid], baseword))
            break;
    if (basewid == fsg->n_word) {
        E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
        return -1;
    }
    altwid = fsg_model_word_add(fsg, altword);
    if (fsg->altwords == NULL)
        fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
    bitvec_set(fsg->altwords, altwid);

    E_DEBUG(2,("Adding alternate word transitions (%s,%s) to FSG\n",
               baseword, altword));

    /* Look for all transitions involving baseword and duplicate them. */
    /* FIXME: This will also get slow, eventually... */
    ntrans = 0;
    for (i = 0; i < fsg->n_state; ++i) {
        hash_iter_t *itor;
        if (fsg->trans[i].trans == NULL)
            continue;
        for (itor = hash_table_iter(fsg->trans[i].trans); itor;
             itor = hash_table_iter_next(itor)) {
            glist_t trans;
            gnode_t *gn;

            trans = hash_entry_val(itor->ent);
            for (gn = trans; gn; gn = gnode_next(gn)) {
                fsg_link_t *fl = gnode_ptr(gn);
                if (fl->wid == basewid) {
                    fsg_link_t *link;

                    /* Create transition object */
                    link = listelem_malloc(fsg->link_alloc);
                    link->from_state = fl->from_state;
                    link->to_state = fl->to_state;
                    link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
                    link->wid = altwid;

                    trans =
                        glist_add_ptr(trans, (void *) link);
                    ++ntrans;
                }
            }
            hash_entry_val(itor->ent) = trans;
        }
    }

    E_DEBUG(2,("Added %d alternate word transitions\n", ntrans));
    return ntrans;
}
fsg_arciter_t *
fsg_arciter_next(fsg_arciter_t * itor)
{
    /* Iterate over non-null arcs first. */
    if (itor->gn) {
        itor->gn = gnode_next(itor->gn);
        /* Move to the next destination arc. */
        if (itor->gn == NULL) {
            itor->itor = hash_table_iter_next(itor->itor);
            if (itor->itor != NULL)
                itor->gn = hash_entry_val(itor->itor->ent);
            else if (itor->null_itor == NULL)
                goto stop_iteration;
        }
    }
    else {
        if (itor->null_itor == NULL)
            goto stop_iteration;
        itor->null_itor = hash_table_iter_next(itor->null_itor);
        if (itor->null_itor == NULL)
            goto stop_iteration;
    }
    return itor;
  stop_iteration:
    fsg_arciter_free(itor);
    return NULL;

}
Beispiel #3
0
int
ps_load_dict(ps_decoder_t *ps, char const *dictfile,
             char const *fdictfile, char const *format)
{
    cmd_ln_t *newconfig;
    dict2pid_t *d2p;
    dict_t *dict;
    hash_iter_t *search_it;

    /* Create a new scratch config to load this dict (so existing one
     * won't be affected if it fails) */
    newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
    cmd_ln_set_boolean_r(newconfig, "-dictcase",
                         cmd_ln_boolean_r(ps->config, "-dictcase"));
    cmd_ln_set_str_r(newconfig, "-dict", dictfile);
    if (fdictfile)
        cmd_ln_set_str_r(newconfig, "-fdict", fdictfile);
    else
        cmd_ln_set_str_r(newconfig, "-fdict",
                         cmd_ln_str_r(ps->config, "-fdict"));

    /* Try to load it. */
    if ((dict = dict_init(newconfig, ps->acmod->mdef, ps->acmod->lmath)) == NULL) {
        cmd_ln_free_r(newconfig);
        return -1;
    }

    /* Reinit the dict2pid. */
    if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
        cmd_ln_free_r(newconfig);
        return -1;
    }

    /* Success!  Update the existing config to reflect new dicts and
     * drop everything into place. */
    cmd_ln_free_r(newconfig);
    cmd_ln_set_str_r(ps->config, "-dict", dictfile);
    if (fdictfile)
        cmd_ln_set_str_r(ps->config, "-fdict", fdictfile);
    dict_free(ps->dict);
    ps->dict = dict;
    dict2pid_free(ps->d2p);
    ps->d2p = d2p;

    /* And tell all searches to reconfigure themselves. */
    for (search_it = hash_table_iter(ps->searches); search_it;
       search_it = hash_table_iter_next(search_it)) {
        if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) {
            hash_table_iter_free(search_it);
            return -1;
        }
    }

    return 0;
}
fsg_link_t *
fsg_arciter_get(fsg_arciter_t * itor)
{
    /* Iterate over non-null arcs first. */
    if (itor->gn)
        return (fsg_link_t *) gnode_ptr(itor->gn);
    else if (itor->null_itor)
        return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
    else
        return NULL;
}
Beispiel #5
0
const char*
ps_get_search(ps_decoder_t *ps)
{
    hash_iter_t *search_it;
    const char* name = NULL;
    for (search_it = hash_table_iter(ps->searches); search_it;
        search_it = hash_table_iter_next(search_it)) {
        if (hash_entry_val(search_it->ent) == ps->search) {
    	    name = hash_entry_key(search_it->ent);
    	    break;
        }
    }
    return name;
}
static void
ps_free_searches(ps_decoder_t *ps)
{
    if (ps->searches) {
        hash_iter_t *search_it;
        for (search_it = hash_table_iter(ps->searches); search_it;
             search_it = hash_table_iter_next(search_it)) {
            ps_search_free(hash_entry_val(search_it->ent));
        }
        hash_table_free(ps->searches);
    }

    ps->searches = NULL;
    ps->search = NULL;
}
fsg_arciter_t *
fsg_model_arcs(fsg_model_t * fsg, int32 i)
{
    fsg_arciter_t *itor;

    if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
        return NULL;
    itor = ckd_calloc(1, sizeof(*itor));
    if (fsg->trans[i].null_trans)
        itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
    if (fsg->trans[i].trans)
        itor->itor = hash_table_iter(fsg->trans[i].trans);
    if (itor->itor != NULL)
        itor->gn = hash_entry_val(itor->itor->ent);
    return itor;
}
Beispiel #8
0
static void
sseq_compress(mdef_t * m)
{
    hash_table_t *h;
    s3senid_t **sseq;
    int32 n_sseq;
    int32 p, j, k;
    glist_t g;
    gnode_t *gn;
    hash_entry_t *he;

    k = m->n_emit_state * sizeof(s3senid_t);

    h = hash_table_new(m->n_phone, HASH_CASE_YES);
    n_sseq = 0;

    /* Identify unique senone-sequence IDs.  BUG: tmat-id not being considered!! */
    for (p = 0; p < m->n_phone; p++) {
        /* Add senone sequence to hash table */
	if ((j = (long)
             hash_table_enter_bkey(h, (char *) (m->sseq[p]), k,
				   (void *)(long)n_sseq)) == n_sseq)
            n_sseq++;

        m->phone[p].ssid = j;
    }

    /* Generate compacted sseq table */
    sseq = (s3senid_t **) ckd_calloc_2d(n_sseq, m->n_emit_state, sizeof(s3senid_t));    /* freed in mdef_free() */

    g = hash_table_tolist(h, &j);
    assert(j == n_sseq);

    for (gn = g; gn; gn = gnode_next(gn)) {
        he = (hash_entry_t *) gnode_ptr(gn);
        j = (int32)(long)hash_entry_val(he);
        memcpy(sseq[j], hash_entry_key(he), k);
    }
    glist_free(g);

    /* Free the old, temporary senone sequence table, replace with compacted one */
    ckd_free_2d((void **) m->sseq);
    m->sseq = sseq;
    m->n_sseq = n_sseq;

    hash_table_free(h);
}
static void
trans_list_free(fsg_model_t * fsg, int32 i)
{
    hash_iter_t *itor;

    /* FIXME (maybe): FSG links will all get freed when we call
     * listelem_alloc_free() so don't bother freeing them explicitly
     * here. */
    if (fsg->trans[i].trans) {
        for (itor = hash_table_iter(fsg->trans[i].trans);
             itor; itor = hash_table_iter_next(itor)) {
            glist_t gl = (glist_t) hash_entry_val(itor->ent);
            glist_free(gl);
        }
    }
    hash_table_free(fsg->trans[i].trans);
    hash_table_free(fsg->trans[i].null_trans);
}
Beispiel #10
0
static void
ps_free_searches(ps_decoder_t *ps)
{
    if (ps->searches) {
        /* Release keys manually as we used ckd_salloc to add them, release every search too. */
        hash_iter_t *search_it;
        for (search_it = hash_table_iter(ps->searches); search_it;
             search_it = hash_table_iter_next(search_it)) {
            ckd_free((char *) hash_entry_key(search_it->ent));
            ps_search_free(hash_entry_val(search_it->ent));
        }

        hash_table_empty(ps->searches);
        hash_table_free(ps->searches);
    }

    ps->searches = NULL;
    ps->search = NULL;
}
Beispiel #11
0
int batch_decoder_free(batch_decoder_t *bd)
{
    hash_iter_t *itor;
    if (bd == NULL)
        return 0;
    if (bd->ctlfh != NULL)
        fclose(bd->ctlfh);
    if (bd->alignfh != NULL)
        fclose(bd->alignfh);
    if (bd->hypfh != NULL)
        fclose(bd->hypfh);
    cmd_ln_free_r(bd->config);
    search_free(bd->fwdtree);
    search_free(bd->fwdflat);
    //search_free(bd->latgen);
    search_factory_free(bd->sf);
    for (itor = hash_table_iter(bd->hypfiles); itor; itor
            = hash_table_iter_next(itor)) {
        fclose(hash_entry_val(itor->ent));
    }
    hash_table_free(bd->hypfiles);
    ckd_free(bd);
    return 0;
}
/* RAH 4.16.01 This code has several leaks that must be fixed */
dict2pid_t *dict2pid_build (mdef_t *mdef, dict_t *dict)
{
    dict2pid_t *dict2pid;
    s3ssid_t *internal, **ldiph, **rdiph, *single;
    int32 pronlen;
    hash_table_t *hs, *hp;
    glist_t g;
    gnode_t *gn;
    s3senid_t *sen;
    hash_entry_t *he;
    int32 *cslen;
    int32 i, j, b, l, r, w, n, p;
    
    E_INFO("Building PID tables for dictionary\n");

    dict2pid = (dict2pid_t *) ckd_calloc (1, sizeof(dict2pid_t));
    dict2pid->internal = (s3ssid_t **) ckd_calloc (dict_size(dict), sizeof(s3ssid_t *));
    dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d (mdef->n_ciphone,
						       mdef->n_ciphone,
						       mdef->n_ciphone,
						       sizeof(s3ssid_t));
    dict2pid->single_lc = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone,
						       mdef->n_ciphone,
						       sizeof(s3ssid_t));
    dict2pid->n_comstate = 0;
    dict2pid->n_comsseq = 0;
    
    hs = hash_new (mdef->n_ciphone * mdef->n_ciphone * mdef->n_emit_state, HASH_CASE_YES);
    hp = hash_new (mdef->n_ciphone * mdef->n_ciphone, HASH_CASE_YES);
    
    for (w = 0, n = 0; w < dict_size(dict); w++) {
	pronlen = dict_pronlen(dict, w);
	if (pronlen < 0)
	    E_FATAL("Pronunciation-length(%s)= %d\n", dict_wordstr(dict, w), pronlen);
	n += pronlen;
    }

    internal = (s3ssid_t *) ckd_calloc (n, sizeof(s3ssid_t));
    
    /* Temporary */
    ldiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t));
    rdiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t));
    single = (s3ssid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3ssid_t));
    for (b = 0; b < mdef->n_ciphone; b++) {
	for (l = 0; l < mdef->n_ciphone; l++) {
	    for (r = 0; r < mdef->n_ciphone; r++)
		dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
	    
	    dict2pid->single_lc[b][l] = BAD_S3SSID;
	    
	    ldiph[b][l] = BAD_S3SSID;
	    rdiph[b][l] = BAD_S3SSID;
	}
	single[b] = BAD_S3SSID;
    }
    
    for (w = 0; w < dict_size(dict); w++) {
	dict2pid->internal[w] = internal;
	pronlen = dict_pronlen(dict,w);
	
	if (pronlen >= 2) {
	    b = dict_pron(dict, w, 0);
	    r = dict_pron(dict, w, 1);
	    if (NOT_S3SSID(ldiph[b][r])) {
		g = ldiph_comsseq(mdef, b, r);
		ldiph[b][r] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
		
		for (l = 0; l < mdef_n_ciphone(mdef); l++) {
		    p = mdef_phone_id_nearest (mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_BEGIN);
		    dict2pid->ldiph_lc[b][r][l] = mdef_pid2ssid(mdef, p);
		}
	    }
	    internal[0] = ldiph[b][r];
	    
	    for (i = 1; i < pronlen-1; i++) {
		l = b;
		b = r;
		r = dict_pron(dict, w, i+1);
		
		p = mdef_phone_id_nearest(mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_INTERNAL);
		internal[i] = mdef_pid2ssid(mdef, p);
	    }
	    
	    l = b;
	    b = r;
	    if (NOT_S3SSID(rdiph[b][l])) {
		g = rdiph_comsseq(mdef, b, l);
		rdiph[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
	    }
	    internal[pronlen-1] = rdiph[b][l];
	} else if (pronlen == 1) {
	    b = dict_pron(dict, w, 0);
	    if (NOT_S3SSID(single[b])) {
		g = single_comsseq(mdef, b);
		single[b] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
		
		for (l = 0; l < mdef_n_ciphone(mdef); l++) {
		    g = single_lc_comsseq(mdef, b, l);
		    dict2pid->single_lc[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		    glist_free (g);
		}
	    }
	    internal[0] = single[b];
	}
	
	internal += pronlen;
    }
    
    ckd_free_2d ((void **) ldiph);
    ckd_free_2d ((void **) rdiph);
    ckd_free ((void *) single);
    
    /* Allocate space for composite state table */
    cslen = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32));
    g = hash_tolist(hs, &n);
    assert (n == dict2pid->n_comstate);
    n = 0;
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	sen = (s3senid_t *) hash_entry_key(he);
	for (i = 0; IS_S3SENID(sen[i]); i++);
	
	cslen[hash_entry_val(he)] = i+1;	/* +1 for terminating sentinel */
	
	n += (i+1);
    }
    dict2pid->comstate = (s3senid_t **) ckd_calloc (dict2pid->n_comstate, sizeof(s3senid_t *));
    sen = (s3senid_t *) ckd_calloc (n, sizeof(s3senid_t));
    for (i = 0; i < dict2pid->n_comstate; i++) {
	dict2pid->comstate[i] = sen;
	sen += cslen[i];
    }
    
    /* Build composite state table from hash table hs */
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	sen = (s3senid_t *) hash_entry_key(he);
	i = hash_entry_val(he);
	
	for (j = 0; j < cslen[i]; j++)
	    dict2pid->comstate[i][j] = sen[j];
	assert (sen[j-1] == BAD_S3SENID);

	ckd_free ((void *)sen);
    }
    ckd_free (cslen);
    glist_free (g);
    hash_free (hs);
    
    /* Allocate space for composite sseq table */
    dict2pid->comsseq = (s3senid_t **) ckd_calloc (dict2pid->n_comsseq, sizeof(s3senid_t *));
    g = hash_tolist (hp, &n);
    assert (n == dict2pid->n_comsseq);
    
    /* Build composite sseq table */
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	i = hash_entry_val(he);
	dict2pid->comsseq[i] = (s3senid_t *) hash_entry_key(he);
    }
    glist_free (g);
    hash_free (hp);
    
    /* Weight for each composite state */
    dict2pid->comwt = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32));
    for (i = 0; i < dict2pid->n_comstate; i++) {
	sen = dict2pid->comstate[i];
	
	for (j = 0; IS_S3SENID(sen[j]); j++);
#if 0
	/* if comstate i has N states, its weight= (1/N^2) (Major Hack!!) */
	dict2pid->comwt[i] = - (logs3 ((float64)j) << 1);
#else
	/* if comstate i has N states, its weight= 1/N */
	dict2pid->comwt[i] = - logs3 ((float64)j);
#endif
    }
    
    E_INFO("%d composite states; %d composite sseq\n",
	   dict2pid->n_comstate, dict2pid->n_comsseq);
    
    return dict2pid;
}
fsg_model_t *
fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
{
    fsg_model_t *fsg;
    hash_table_t *vocab;
    hash_iter_t *itor;
    int32 lastwid;
    char **wordptr;
    char *lineptr;
    char *fsgname;
    int32 lineno;
    int32 n, i, j;
    int n_state, n_trans, n_null_trans;
    glist_t nulls;
    float32 p;

    lineno = 0;
    vocab = hash_table_new(32, FALSE);
    wordptr = NULL;
    lineptr = NULL;
    nulls = NULL;
    fsgname = NULL;
    fsg = NULL;

    /* Scan upto FSG_BEGIN header */
    for (;;) {
        n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
        if (n < 0) {
            E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
            goto parse_error;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
            if (n > 2) {
                E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
                        lineno);
                goto parse_error;
            }
            break;
        }
    }
    /* Save FSG name, or it will get clobbered below :(.
     * If name is missing, try the default.
     */
    if (n == 2) {
        fsgname = ckd_salloc(wordptr[1]);
    }
    else {
        E_WARN("FSG name is missing\n");
        fsgname = ckd_salloc("unknown");
    }

    /* Read #states */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &n_state) != 1)
        || (n_state <= 0)) {
        E_ERROR
            ("Line[%d]: #states declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Now create the FSG. */
    fsg = fsg_model_init(fsgname, lmath, lw, n_state);
    ckd_free(fsgname);
    fsgname = NULL;

    /* Read start state */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
        || (fsg->start_state < 0)
        || (fsg->start_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: start state declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Read final state */
    n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
    if ((n != 2)
        || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
            && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
        || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
        || (fsg->final_state < 0)
        || (fsg->final_state >= fsg->n_state)) {
        E_ERROR
            ("Line[%d]: final state declaration line missing or malformed\n",
             lineno);
        goto parse_error;
    }

    /* Read transitions */
    lastwid = 0;
    n_trans = n_null_trans = 0;
    for (;;) {
        int32 wid, tprob;

        n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
        if (n <= 0) {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
            break;
        }

        if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
            || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {


            if (((n != 4) && (n != 5))
                || (sscanf(wordptr[1], "%d", &i) != 1)
                || (sscanf(wordptr[2], "%d", &j) != 1)
                || (i < 0) || (i >= fsg->n_state)
                || (j < 0) || (j >= fsg->n_state)) {
                E_ERROR
                    ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
                     lineno);
                goto parse_error;
            }

            p = atof_c(wordptr[3]);
            if ((p <= 0.0) || (p > 1.0)) {
                E_ERROR
                    ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
                     lineno);
                goto parse_error;
            }
        }
        else {
            E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
                    lineno);
            goto parse_error;
        }

        tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
        /* Add word to "dictionary". */
        if (n > 4) {
            if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
                (void) hash_table_enter_int32(vocab,
                                              ckd_salloc(wordptr[4]),
                                              lastwid);
                wid = lastwid;
                ++lastwid;
            }
            fsg_model_trans_add(fsg, i, j, tprob, wid);
            ++n_trans;
        }
        else {
            if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
                ++n_null_trans;
                nulls =
                    glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
            }
        }
    }

    E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
           fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);


    /* Now create a string table from the "dictionary" */
    fsg->n_word = hash_table_inuse(vocab);
    fsg->n_word_alloc = fsg->n_word + 10;       /* Pad it a bit. */
    fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
    for (itor = hash_table_iter(vocab); itor;
         itor = hash_table_iter_next(itor)) {
        char const *word = hash_entry_key(itor->ent);
        int32 wid = (int32) (long) hash_entry_val(itor->ent);
        fsg->vocab[wid] = (char *) word;
    }
    hash_table_free(vocab);

    /* Do transitive closure on null transitions */
    nulls = fsg_model_null_trans_closure(fsg, nulls);
    glist_free(nulls);

    ckd_free(lineptr);
    ckd_free(wordptr);

    return fsg;

  parse_error:
    for (itor = hash_table_iter(vocab); itor;
         itor = hash_table_iter_next(itor))
        ckd_free((char *) hash_entry_key(itor->ent));
    glist_free(nulls);
    hash_table_free(vocab);
    ckd_free(fsgname);
    ckd_free(lineptr);
    ckd_free(wordptr);
    fsg_model_free(fsg);
    return NULL;
}
Beispiel #14
0
static int
run_control_file(sphinx_wave2feat_t *wtf, char const *ctlfile)
{
    hash_table_t *files;
    hash_iter_t *itor;
    lineiter_t *li;
    FILE *ctlfh;
    int nskip, runlen, npart, rv = 0;

    if ((ctlfh = fopen(ctlfile, "r")) == NULL) {
        E_ERROR_SYSTEM("Failed to open control file %s", ctlfile);
        return -1;
    }
    nskip = cmd_ln_int32_r(wtf->config, "-nskip");
    runlen = cmd_ln_int32_r(wtf->config, "-runlen");
    if ((npart = cmd_ln_int32_r(wtf->config, "-npart"))) {
        /* Count lines in the file. */
        int partlen, part, nlines = 0;
        part = cmd_ln_int32_r(wtf->config, "-part");
        for (li = lineiter_start(ctlfh); li; li = lineiter_next(li))
            ++nlines;
        fseek(ctlfh, 0, SEEK_SET);
        partlen = nlines / npart;
        nskip = partlen * (part - 1);
        if (part == npart)
            runlen = -1;
        else
            runlen = partlen;
    }
    if (runlen != -1){
        E_INFO("Processing %d utterances at position %d\n", runlen, nskip);
        files = hash_table_new(runlen, HASH_CASE_YES);
    }
    else {
        E_INFO("Processing all remaining utterances at position %d\n", nskip);
        files = hash_table_new(1000, HASH_CASE_YES);
    }
    for (li = lineiter_start(ctlfh); li; li = lineiter_next(li)) {
        char *c, *infile, *outfile;

        if (nskip-- > 0)
            continue;
        if (runlen == 0) {
            lineiter_free(li);
            break;
        }
        --runlen;

        string_trim(li->buf, STRING_BOTH);
        /* Extract the file ID from the control line. */
        if ((c = strchr(li->buf, ' ')) != NULL)
            *c = '\0';
        if (strlen(li->buf) == 0) {
    	    E_WARN("Empty line %d in control file, skipping\n", li->lineno);
    	    continue;
        }
        build_filenames(wtf->config, li->buf, &infile, &outfile);
        if (hash_table_lookup(files, infile, NULL) == 0)
            continue;
        rv = sphinx_wave2feat_convert_file(wtf, infile, outfile);
        hash_table_enter(files, infile, outfile);
        if (rv != 0) {
            lineiter_free(li);
            break;
        }
    }
    for (itor = hash_table_iter(files); itor;
         itor = hash_table_iter_next(itor)) {
        ckd_free((void *)hash_entry_key(itor->ent));
        ckd_free(hash_entry_val(itor->ent));
    }
    hash_table_free(files);

    if (fclose(ctlfh) == EOF)
        E_ERROR_SYSTEM("Failed to close control file");
    return rv;
}
jsgf_rule_t *
jsgf_import_rule(jsgf_t * jsgf, char *name)
{
    char *c, *path, *newpath;
    size_t namelen, packlen;
    void *val;
    jsgf_t *imp;
    int import_all;

    /* Trim the leading and trailing <> */
    namelen = strlen(name);
    path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */
    strcpy(path, name + 1);
    /* Split off the first part of the name */
    c = strrchr(path, '.');
    if (c == NULL) {
        E_ERROR("Imported rule is not qualified: %s\n", name);
        ckd_free(path);
        return NULL;
    }
    packlen = c - path;
    *c = '\0';

    /* Look for import foo.* */
    import_all = (strlen(name) > 2
                  && 0 == strcmp(name + namelen - 3, ".*>"));

    /* Construct a filename. */
    for (c = path; *c; ++c)
        if (*c == '.')
            *c = '/';
    strcat(path, ".gram");
    newpath = path_list_search(jsgf->searchpath, path);
    if (newpath == NULL) {
        E_ERROR("Failed to find grammar %s\n", path);
        ckd_free(path);
        return NULL;
    }
    ckd_free(path);

    path = newpath;
    E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name);

    /* FIXME: Also, we need to make sure that path is fully qualified
     * here, by adding any prefixes from jsgf->name to it. */
    /* See if we have parsed it already */
    if (hash_table_lookup(jsgf->imports, path, &val) == 0) {
        E_INFO("Already imported %s\n", path);
        imp = val;
        ckd_free(path);
    }
    else {
        /* If not, parse it. */
        imp = jsgf_parse_file(path, jsgf);
        val = hash_table_enter(jsgf->imports, path, imp);
        if (val != (void *) imp) {
            E_WARN("Multiply imported file: %s\n", path);
        }
    }
    if (imp != NULL) {
        hash_iter_t *itor;
        /* Look for public rules matching rulename. */
        for (itor = hash_table_iter(imp->rules); itor;
             itor = hash_table_iter_next(itor)) {
            hash_entry_t *he = itor->ent;
            jsgf_rule_t *rule = hash_entry_val(he);
            int rule_matches;
            char *rule_name = importname2rulename(name);

            if (import_all) {
                /* Match package name (symbol table is shared) */
                rule_matches =
                    !strncmp(rule_name, rule->name, packlen + 1);
            }
            else {
                /* Exact match */
                rule_matches = !strcmp(rule_name, rule->name);
            }
            ckd_free(rule_name);
            if (rule->is_public && rule_matches) {
                void *val;
                char *newname;

                /* Link this rule into the current namespace. */
                c = strrchr(rule->name, '.');
                assert(c != NULL);
                newname = jsgf_fullname(jsgf, c);

                E_INFO("Imported %s\n", newname);
                val = hash_table_enter(jsgf->rules, newname,
                                       jsgf_rule_retain(rule));
                if (val != (void *) rule) {
                    E_WARN("Multiply defined symbol: %s\n", newname);
                }
                if (!import_all) {
                    hash_table_iter_free(itor);
                    return rule;
                }
            }
        }
    }

    return NULL;
}
glist_t
fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
{
    gnode_t *gn1;
    int updated;
    fsg_link_t *tl1, *tl2;
    int32 k, n;

    E_INFO("Computing transitive closure for null transitions\n");

    if (nulls == NULL) {
        fsg_link_t *null;
        int i, j;

        for (i = 0; i < fsg->n_state; ++i) {
            for (j = 0; j < fsg->n_state; ++j) {
                if ((null = fsg_model_null_trans(fsg, i, j)))
                    nulls = glist_add_ptr(nulls, null);
            }
        }
    }

    /*
     * Probably not the most efficient closure implementation, in general, but
     * probably reasonably efficient for a sparse null transition matrix.
     */
    n = 0;
    do {
        updated = FALSE;

        for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
            hash_iter_t *itor;

            tl1 = (fsg_link_t *) gnode_ptr(gn1);
            assert(tl1->wid < 0);

            if (fsg->trans[tl1->to_state].null_trans == NULL)
                continue;

            for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans);
                 itor; itor = hash_table_iter_next(itor)) {

                tl2 = (fsg_link_t *) hash_entry_val(itor->ent);

                k = fsg_model_null_trans_add(fsg,
                                             tl1->from_state,
                                             tl2->to_state,
                                             tl1->logs2prob +
                                             tl2->logs2prob);
                if (k >= 0) {
                    updated = TRUE;
                    if (k > 0) {
                        nulls = glist_add_ptr(nulls, (void *)
                                              fsg_model_null_trans
                                              (fsg, tl1->from_state,
                                               tl2->to_state));
                        n++;
                    }
                }
            }
        }
    } while (updated);
    
    E_INFO("%d null transitions added\n", n);

    return nulls;
}
Beispiel #17
0
glist_t
fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
{
    gnode_t *gn1;
    int updated;
    fsg_link_t *tl1, *tl2;
    int32 k, n;

    E_INFO("Computing transitive closure for null transitions\n");

    /* If our caller didn't give us a list of null-transitions,
       make such a list. Just loop through all the FSG states, 
       and all the null-transitions in that state (which are kept in
       their own hash table). */
    if (nulls == NULL) {
        int i;
        for (i = 0; i < fsg->n_state; ++i) {
            hash_iter_t *itor;
            hash_table_t *null_trans = fsg->trans[i].null_trans;
            if (null_trans == NULL)
                continue;
            for (itor = hash_table_iter(null_trans);
                 itor != NULL;
                 itor = hash_table_iter_next(itor)) {
                nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent));
            }
        }
    }

    /*
     * Probably not the most efficient closure implementation, in general, but
     * probably reasonably efficient for a sparse null transition matrix.
     */
    n = 0;
    do {
        updated = FALSE;

        for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
            hash_iter_t *itor;

            tl1 = (fsg_link_t *) gnode_ptr(gn1);
            assert(tl1->wid < 0);

            if (fsg->trans[tl1->to_state].null_trans == NULL)
                continue;

            for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans);
                 itor; itor = hash_table_iter_next(itor)) {

                tl2 = (fsg_link_t *) hash_entry_val(itor->ent);

                k = fsg_model_null_trans_add(fsg,
                                             tl1->from_state,
                                             tl2->to_state,
                                             tl1->logs2prob +
                                             tl2->logs2prob);
                if (k >= 0) {
                    updated = TRUE;
                    if (k > 0) {
                        nulls = glist_add_ptr(nulls, (void *)
                                              fsg_model_null_trans
                                              (fsg, tl1->from_state,
                                               tl2->to_state));
                        n++;
                    }
                }
            }
        }
    } while (updated);
    
    E_INFO("%d null transitions added\n", n);

    return nulls;
}
Beispiel #18
0
int
ps_add_word(ps_decoder_t *ps,
            char const *word,
            char const *phones,
            int update)
{
    int32 wid;
    s3cipid_t *pron;
    hash_iter_t *search_it;
    char **phonestr, *tmp;
    int np, i, rv;

    /* Parse phones into an array of phone IDs. */
    tmp = ckd_salloc(phones);
    np = str2words(tmp, NULL, 0);
    phonestr = ckd_calloc(np, sizeof(*phonestr));
    str2words(tmp, phonestr, np);
    pron = ckd_calloc(np, sizeof(*pron));
    for (i = 0; i < np; ++i) {
        pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]);
        if (pron[i] == -1) {
            E_ERROR("Unknown phone %s in phone string %s\n",
                    phonestr[i], tmp);
            ckd_free(phonestr);
            ckd_free(tmp);
            ckd_free(pron);
            return -1;
        }
    }
    /* No longer needed. */
    ckd_free(phonestr);
    ckd_free(tmp);

    /* Add it to the dictionary. */
    if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) {
        ckd_free(pron);
        return -1;
    }
    /* No longer needed. */
    ckd_free(pron);

    /* Now we also have to add it to dict2pid. */
    dict2pid_add_word(ps->d2p, wid);

    /* TODO: we definitely need to refactor this */
    for (search_it = hash_table_iter(ps->searches); search_it;
         search_it = hash_table_iter_next(search_it)) {
        ps_search_t *search = hash_entry_val(search_it->ent);
        if (!strcmp(PS_SEARCH_NGRAM, ps_search_name(search))) {
            ngram_model_t *lmset = ((ngram_search_t *) search)->lmset;
            if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
                hash_table_iter_free(search_it);
                return -1;
            }
        }

        if (update) {
            if ((rv = ps_search_reinit(search, ps->dict, ps->d2p) < 0)) {
                hash_table_iter_free(search_it);
                return rv;
            }
        }
    }

    /* Rebuild the widmap and search tree if requested. */
    return wid;
}