Ejemplo n.º 1
0
int
dict_write(dict_t *dict, char const *filename, char const *format)
{
    FILE *fh;
    int i;

    if ((fh = fopen(filename, "w")) == NULL) {
        E_ERROR_SYSTEM("Failed to open %s", filename);
        return -1;
    }
    for (i = 0; i < dict->n_word; ++i) {
        char *phones;
        int j, phlen;
        if (!dict_real_word(dict, i))
            continue;
        for (phlen = j = 0; j < dict_pronlen(dict, i); ++j)
            phlen += strlen(dict_ciphone_str(dict, i, j)) + 1;
        phones = ckd_calloc(1, phlen);
        for (j = 0; j < dict_pronlen(dict, i); ++j) {
            strcat(phones, dict_ciphone_str(dict, i, j));
            if (j != dict_pronlen(dict, i) - 1)
                strcat(phones, " ");
        }
        fprintf(fh, "%-30s %s\n", dict_wordstr(dict, i), phones);
        ckd_free(phones);
    }
    fclose(fh);
    return 0;
}
Ejemplo n.º 2
0
char *
ps_lookup_word(ps_decoder_t *ps, const char *word)
{
    s3wid_t wid;
    int32 phlen, j;
    char *phones;
    dict_t *dict = ps->dict;
    
    wid = dict_wordid(dict, word);
    if (wid == BAD_S3WID)
	return NULL;

    for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j)
        phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1;
    phones = ckd_calloc(1, phlen);
    for (j = 0; j < dict_pronlen(dict, wid); ++j) {
        strcat(phones, dict_ciphone_str(dict, wid, j));
        if (j != dict_pronlen(dict, wid) - 1)
            strcat(phones, " ");
    }
    return phones;
}
Ejemplo n.º 3
0
main (int32 argc, char *argv[])
{
    dict_t **d;
    int32 i, k, p, wid;
    char line[16384], *wp[1024];
    
    if (argc < 2) {
	E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]);
	exit(0);
    }
    d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *));
    
    for (i = 1; i < argc; i++)
	d[i-1] = dict_init (NULL, argv[i], NULL, 0);
    
    while (fgets (line, sizeof(line), stdin) != NULL) {
	if ((k = str2words (line, wp, 1024)) < 0)
	    E_FATAL("Line too long: %s\n", line);
	if (k > 2)
	    E_FATAL("Vocab entry contains too many words\n");
	
	if (k == 0)
	    continue;
	if (k == 1)
	    wp[1] = wp[0];
	
	/* Look up word in each dictionary until found */
	k = 0;
	for (i = 0; (i < argc-1) && (k == 0); i++) {
	    wid = dict_wordid (d[i], wp[1]);
	    if (NOT_WID(wid))
		continue;
	    
	    for (wid = dict_basewid(d[i], wid);
		 IS_WID(wid);
		 wid = dict_nextalt(d[i], wid)) {
		k++;
		if (k == 1)
		    printf ("%s\t", wp[0]);
		else
		    printf ("%s(%d)\t", wp[0], k);
		
		for (p = 0; p < dict_pronlen(d[i], wid); p++)
		    printf (" %s", dict_ciphone_str (d[i], wid, p));
		printf ("\n");
	    }
	}
	if (k == 0)
	    E_ERROR("No pronunciation for: '%s'\n", wp[0]);
    }
}
Ejemplo n.º 4
0
void
dict2pid_dump(FILE * fp, dict2pid_t * d2p)
{
    int32 w, p, pronlen;
    int32 i, j, b, l, r;
    bin_mdef_t *mdef = d2p->mdef;
    dict_t *dict = d2p->dict;

    fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n");
    for (w = 0; w < dict_size(dict); w++) {
        fprintf(fp, "%30s ", dict_wordstr(dict, w));

        pronlen = dict_pronlen(dict, w);
        for (p = 0; p < pronlen; p++)
            fprintf(fp, " %5d", dict2pid_internal(d2p, w, p));
        fprintf(fp, "\n");
    }
    fprintf(fp, "#\n");

    fprintf(fp, "# LDIPH_LC (b r l ssid)\n");
    for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) {
        for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
            for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
                if (IS_S3SSID(d2p->ldiph_lc[b][r][l]))
                    fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]);      /* RAH, ldiph_lc is returning an int32, %d expects an int16 */
            }
        }
    }
    fprintf(fp, "#\n");

    fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq);
    for (i = 0; i < mdef->n_sseq; i++) {
        fprintf(fp, "%5d ", i);
        for (j = 0; j < bin_mdef_n_emit_state(mdef); j++)
            fprintf(fp, " %5d", mdef->sseq[i][j]);
        fprintf(fp, "\n");
    }
    fprintf(fp, "#\n");
    fprintf(fp, "# END\n");

    fflush(fp);
}
Ejemplo n.º 5
0
s3ssid_t
dict2pid_internal(dict2pid_t *d2p,
                  int32 wid,
                  int pos)
{
    int b, l, r, p;
    dict_t *dict = d2p->dict;
    bin_mdef_t *mdef = d2p->mdef;

    if (pos == 0 || pos == dict_pronlen(dict, wid))
        return BAD_S3SSID;

    b = dict_pron(dict, wid, pos);
    l = dict_pron(dict, wid, pos - 1);
    r = dict_pron(dict, wid, pos + 1);
    p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
                                  (s3cipid_t) l, (s3cipid_t) r,
                                  WORD_POSN_INTERNAL);
    return bin_mdef_pid2ssid(mdef, p);
}
Ejemplo n.º 6
0
int
dict2pid_add_word(dict2pid_t *d2p,
                  int32 wid)
{
    bin_mdef_t *mdef = d2p->mdef;
    dict_t *d = d2p->dict;

    if (dict_pronlen(d, wid) > 1) {
        s3cipid_t l;
        /* Make sure we have left and right context diphones for this
         * word. */
        if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0]
            == BAD_S3SSID) {
            E_INFO("Filling in left-context diphones for %s(?,%s)\n",
                   bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)),
                   bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid)));
            for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
                s3ssid_t p
                    = bin_mdef_phone_id_nearest(mdef,
                                                dict_first_phone(d, wid), l,
                                                dict_second_phone(d, wid),
                                                WORD_POSN_BEGIN);
                d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l]
                    = bin_mdef_pid2ssid(mdef, p);
            }
        }
        if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid
            == 0) {
            s3ssid_t *rmap;
            s3ssid_t *tmpssid;
            s3cipid_t *tmpcimap;
            s3cipid_t r;

            E_INFO("Filling in right-context diphones for %s(%s,?)\n",
                   bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)),
                   bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid)));
            rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap));
            for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
                s3ssid_t p
                    = bin_mdef_phone_id_nearest(mdef,
                                                dict_last_phone(d, wid),
                                                dict_second_last_phone(d, wid), r,
                                                WORD_POSN_END);
                rmap[r] = bin_mdef_pid2ssid(mdef, p);
            }
            tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid));
            tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap));
            compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef));
            for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++)
                ;
            d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid;
            d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap;
            d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r;
            ckd_free(rmap);
        }
    }
    else {
        /* Make sure we have a left-right context triphone entry for
         * this word. */
        E_INFO("Filling in context triphones for %s(?,?)\n",
               bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)));
        if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) {
            populate_lrdiph(d2p, NULL, dict_first_phone(d, wid));
        }
    }

    return 0;
}
Ejemplo n.º 7
0
/*
 * Add the word emitted by the given transition (fsglink) to the given lextree
 * (rooted at root), and return the new lextree root.  (There may actually be
 * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
 * "root" is the head of this list.)
 * lclist, rclist: sets of left and right context phones for this link.
 * alloc_head: head of a linear list of all allocated pnodes for the parent
 * FSG state, kept elsewhere and updated by this routine.
 */
static fsg_pnode_t *
psubtree_add_trans(fsg_lextree_t *lextree, 
                   fsg_pnode_t * root,
                   fsg_glist_linklist_t **curglist,
                   fsg_link_t * fsglink,
                   int16 *lclist, int16 *rclist,
                   fsg_pnode_t ** alloc_head)
{
    int32 silcipid;             /* Silence CI phone ID */
    int32 pronlen;              /* Pronunciation length */
    int32 wid;                  /* FSG (not dictionary!!) word ID */
    int32 dictwid;              /* Dictionary (not FSG!!) word ID */
    int32 ssid;                 /* Senone Sequence ID */
    gnode_t *gn;
    fsg_pnode_t *pnode, *pred, *head;
    int32 n_ci, p, lc, rc;
    glist_t lc_pnodelist;       /* Temp pnodes list for different left contexts */
    glist_t rc_pnodelist;       /* Temp pnodes list for different right contexts */
    int32 i, j;

    silcipid = bin_mdef_silphone(lextree->mdef);
    n_ci = bin_mdef_n_ciphone(lextree->mdef);

    wid = fsg_link_wid(fsglink);
    assert(wid >= 0);           /* Cannot be a null transition */
    dictwid = dict_wordid(lextree->dict,
                          fsg_model_word_str(lextree->fsg, wid));
    pronlen = dict_pronlen(lextree->dict, dictwid);
    assert(pronlen >= 1);

    assert(lclist[0] >= 0);     /* At least one phonetic context provided */
    assert(rclist[0] >= 0);

    head = *alloc_head;
    pred = NULL;

    if (pronlen == 1) {         /* Single-phone word */
        int ci = dict_first_phone(lextree->dict, dictwid);
        /* Only non-filler words are mpx */
        if (dict_filler_word(lextree->dict, dictwid)) {
            /*
             * Left diphone ID for single-phone words already assumes SIL is right
             * context; only left contexts need to be handled.
             */
            lc_pnodelist = NULL;

            for (i = 0; lclist[i] >= 0; i++) {
                lc = lclist[i];
                ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
                /* Check if this ssid already allocated for some other context */
                for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                    pnode = (fsg_pnode_t *) gnode_ptr(gn);

                    if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
                        /* already allocated; share it for this context phone */
                        fsg_pnode_add_ctxt(pnode, lc);
                        break;
                    }
                }

                if (!gn) {      /* ssid not already allocated */
                    pnode =
                        (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
                    pnode->ctx = lextree->ctx;
                    pnode->next.fsglink = fsglink;
                    pnode->logs2prob =
                        fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
                    pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
                    pnode->ppos = 0;
                    pnode->leaf = TRUE;
                    pnode->sibling = root;      /* All root nodes linked together */
                    fsg_pnode_add_ctxt(pnode, lc);      /* Initially zeroed by calloc above */
                    pnode->alloc_next = head;
                    head = pnode;
                    root = pnode;

                    hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                    lc_pnodelist =
                        glist_add_ptr(lc_pnodelist, (void *) pnode);
                }
            }

            glist_free(lc_pnodelist);
        }
        else {                  /* Filler word; no context modelled */
            ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */

            pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
            pnode->ctx = lextree->ctx;
            pnode->next.fsglink = fsglink;
            pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip;
            pnode->ci_ext = silcipid;   /* Presents SIL as context to neighbors */
            pnode->ppos = 0;
            pnode->leaf = TRUE;
            pnode->sibling = root;
            fsg_pnode_add_all_ctxt(&(pnode->ctxt));
            pnode->alloc_next = head;
            head = pnode;
            root = pnode;

            hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);
        }
    }
    else {                      /* Multi-phone word */
        fsg_pnode_t **ssid_pnode_map;       /* Temp array of ssid->pnode mapping */
        ssid_pnode_map =
            (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
        lc_pnodelist = NULL;
        rc_pnodelist = NULL;

        for (p = 0; p < pronlen; p++) {
            int ci = dict_pron(lextree->dict, dictwid, p);
            if (p == 0) {       /* Root phone, handle required left contexts */
                /* Find if we already have an lc_pnodelist for the first phone of this word */
		fsg_glist_linklist_t *predglist=*curglist;
		fsg_glist_linklist_t *glist=*curglist;

                rc = dict_pron(lextree->dict, dictwid, 1);
		while (glist && glist->glist && glist->ci != ci && glist->rc != rc){
		    glist = glist->next;
		}
		if (glist && glist->ci == ci && glist->rc == rc && glist->glist) {
		    /* We've found a valid glist. Hook to it and move to next phoneme */
		    lc_pnodelist = glist->glist;
                    /* Set the predecessor node for the future tree first */
		    pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist);
		    continue;
		}
		else {
		    /* Two cases that can bring us here
		     * a. glist == NULL, i.e. end of current list. Create new entry.
		     * b. glist->glist == NULL, i.e. first entry into list.
		     */
		    if (!glist) { /* Case a; reduce it to case b by allocing glist */
		        glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t));
			glist->next = predglist;
                        *curglist = glist;
		    }
		    glist->ci = ci;
                    glist->rc = rc;
                    glist->lc = -1;
		    lc_pnodelist = glist->glist = NULL; /* Gets created below */
		}

                for (i = 0; lclist[i] >= 0; i++) {
                    lc = lclist[i];
                    ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc);
                    /* Compression is not done by d2p, so we do it
                     * here.  This might be slow, but it might not
                     * be... we'll see. */
                    pnode = ssid_pnode_map[0];
                    for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) {
                        pnode = ssid_pnode_map[j];
                        if (hmm_nonmpx_ssid(&pnode->hmm) == ssid)
                            break;
                    }
                    assert(j < n_ci);
                    if (!pnode) {       /* Allocate pnode for this new ssid */
                        pnode =
                            (fsg_pnode_t *) ckd_calloc(1,
                                                       sizeof
                                                       (fsg_pnode_t));
                        pnode->ctx = lextree->ctx;
	                /* This bit is tricky! For now we'll put the prob in the final link only */
                        /* pnode->logs2prob = fsg_link_logs2prob(fsglink)
                           + lextree->wip + lextree->pip; */
                        pnode->logs2prob = lextree->wip + lextree->pip;
                        pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
                        pnode->ppos = 0;
                        pnode->leaf = FALSE;
                        pnode->sibling = root;  /* All root nodes linked together */
                        pnode->alloc_next = head;
                        head = pnode;
                        root = pnode;

                        hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                        lc_pnodelist =
                            glist_add_ptr(lc_pnodelist, (void *) pnode);
                        ssid_pnode_map[j] = pnode;
                    }
                    fsg_pnode_add_ctxt(pnode, lc);
                }
		/* Put the lc_pnodelist back into glist */
		glist->glist = lc_pnodelist;

                /* The predecessor node for the future tree is the root */
		pred = root;
            }
            else if (p != pronlen - 1) {        /* Word internal phone */
                fsg_pnode_t    *pnodeyoungest;

                ssid = dict2pid_internal(lextree->d2p, dictwid, p);
	        /* First check if we already have this ssid in our tree */
		pnode = pred->next.succ;
		pnodeyoungest = pnode; /* The youngest sibling */
		while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) {
		    pnode = pnode->sibling;
		}
		if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) {
		    /* Found the ssid; go to next phoneme */
		    pred = pnode;
		    continue;
		}

		/* pnode not found, allocate it */
                pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
                pnode->ctx = lextree->ctx;
                pnode->logs2prob = lextree->pip;
                pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
                pnode->ppos = p;
                pnode->leaf = FALSE;
                pnode->sibling = pnodeyoungest; /* May be NULL */
                if (p == 1) {   /* Predecessor = set of root nodes for left ctxts */
                    for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                        pred = (fsg_pnode_t *) gnode_ptr(gn);
                        pred->next.succ = pnode;
                    }
                }
                else {          /* Predecessor = word internal node */
                    pred->next.succ = pnode;
                }
                pnode->alloc_next = head;
                head = pnode;

                hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                pred = pnode;
            }
            else {              /* Leaf phone, handle required right contexts */
	        /* Note, leaf phones are not part of the tree */
                xwdssid_t *rssid;
                memset((void *) ssid_pnode_map, 0,
                       n_ci * sizeof(fsg_pnode_t *));
                lc = dict_pron(lextree->dict, dictwid, p-1);
                rssid = dict2pid_rssid(lextree->d2p, ci, lc);

                for (i = 0; rclist[i] >= 0; i++) {
                    rc = rclist[i];

                    j = rssid->cimap[rc];
                    ssid = rssid->ssid[j];
                    pnode = ssid_pnode_map[j];

                    if (!pnode) {       /* Allocate pnode for this new ssid */
                        pnode =
                            (fsg_pnode_t *) ckd_calloc(1,
                                                       sizeof
                                                       (fsg_pnode_t));
                        pnode->ctx = lextree->ctx;
			/* We are plugging the word prob here. Ugly */
                        /* pnode->logs2prob = lextree->pip; */
                        pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip;
                        pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
                        pnode->ppos = p;
                        pnode->leaf = TRUE;
                        pnode->sibling = rc_pnodelist ?
                            (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
                        pnode->next.fsglink = fsglink;
                        pnode->alloc_next = head;
                        head = pnode;

                        hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext);

                        rc_pnodelist =
                            glist_add_ptr(rc_pnodelist, (void *) pnode);
                        ssid_pnode_map[j] = pnode;
                    }
                    else {
                        assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
                    }
                    fsg_pnode_add_ctxt(pnode, rc);
                }

                if (p == 1) {   /* Predecessor = set of root nodes for left ctxts */
                    for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                        pred = (fsg_pnode_t *) gnode_ptr(gn);
                        if (!pred->next.succ)
                            pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
                        else {
                            /* Link to the end of the sibling chain */
                            fsg_pnode_t *succ = pred->next.succ;
                            while (succ->sibling) succ = succ->sibling;
                            succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist);
                            /* Since all entries of lc_pnodelist point
                               to the same array, sufficient to update it once */
                            break; 
                        }
                    }
                }
                else {          /* Predecessor = word internal node */
                    if (!pred->next.succ)
                        pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
                    else {
                        /* Link to the end of the sibling chain */
                        fsg_pnode_t *succ = pred->next.succ;
                        while (succ->sibling) succ = succ->sibling;
                        succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
                    }
                }
            }
        }

        ckd_free((void *) ssid_pnode_map);
        /* glist_free(lc_pnodelist);  Nope; this gets freed outside */
        glist_free(rc_pnodelist);
    }

    *alloc_head = head;

    return root;
}
Ejemplo n.º 8
0
/**
 * Compute the left and right context CIphone sets for each state.
 */
static void
fsg_lextree_lc_rc(fsg_lextree_t *lextree)
{
    int32 s, i, j;
    int32 n_ci;
    fsg_model_t *fsg;
    int32 silcipid;
    int32 len;

    silcipid = bin_mdef_silphone(lextree->mdef);
    assert(silcipid >= 0);
    n_ci = bin_mdef_n_ciphone(lextree->mdef);

    fsg = lextree->fsg;
    /*
     * lextree->lc[s] = set of left context CIphones for state s.  Similarly, rc[s]
     * for right context CIphones.
     */
    lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc));
    lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc));
    E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n",
           fsg->n_state * (n_ci + 1) * 2,
           fsg->n_state * (n_ci + 1) * 2 / 1024);


    for (s = 0; s < fsg->n_state; s++) {
        fsg_arciter_t *itor;
        for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
            fsg_link_t *l = fsg_arciter_get(itor);
            int32 dictwid; /**< Dictionary (not FSG) word ID!! */

            if (fsg_link_wid(l) >= 0) {
                dictwid = dict_wordid(lextree->dict,
                                      fsg_model_word_str(lextree->fsg, l->wid));

                /*
                 * Add the first CIphone of l->wid to the rclist of state s, and
                 * the last CIphone to lclist of state d.
                 * (Filler phones are a pain to deal with.  There is no direct
                 * marking of a filler phone; but only filler words are supposed to
                 * use such phones, so we use that fact.  HACK!!  FRAGILE!!)
                 */
                if (fsg_model_is_filler(fsg, fsg_link_wid(l))) {
                    /* Filler phone; use silence phone as context */
                    lextree->rc[fsg_link_from_state(l)][silcipid] = 1;
                    lextree->lc[fsg_link_to_state(l)][silcipid] = 1;
                }
                else {
                    len = dict_pronlen(lextree->dict, dictwid);
                    lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1;
                    lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1;
                }
            }
        }
    }

    for (s = 0; s < fsg->n_state; s++) {
        /*
         * Add SIL phone to the lclist and rclist of each state.  Strictly
         * speaking, only needed at start and final states, respectively, but
         * all states considered since the user may change the start and final
         * states.  In any case, most applications would have a silence self
         * loop at each state, hence these would be needed anyway.
         */
        lextree->lc[s][silcipid] = 1;
        lextree->rc[s][silcipid] = 1;
    }

    /*
     * Propagate lc and rc lists past null transitions.  (Since FSG contains
     * null transitions closure, no need to worry about a chain of successive
     * null transitions.  Right??)
     *
     * This can't be joined with the previous loop because we first calculate 
     * contexts and only then we can propagate them.
     */
    for (s = 0; s < fsg->n_state; s++) {
        fsg_arciter_t *itor;
        for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
            fsg_link_t *l = fsg_arciter_get(itor);
            if (fsg_link_wid(l) < 0) {

                /*
                 * lclist(d) |= lclist(s), because all the words ending up at s, can
                 * now also end at d, becoming the left context for words leaving d.
                 */
                for (i = 0; i < n_ci; i++)
                    lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i];
                /*
                 * Similarly, rclist(s) |= rclist(d), because all the words leaving d
                 * can equivalently leave s, becoming the right context for words
                 * ending up at s.
                 */
                for (i = 0; i < n_ci; i++)
                    lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i];
            }
        }
    }

    /* Convert the bit-vector representation into a list */
    for (s = 0; s < fsg->n_state; s++) {
        j = 0;
        for (i = 0; i < n_ci; i++) {
            if (lextree->lc[s][i]) {
                lextree->lc[s][j] = i;
                j++;
            }
        }
        lextree->lc[s][j] = -1;     /* Terminate the list */

        j = 0;
        for (i = 0; i < n_ci; i++) {
            if (lextree->rc[s][i]) {
                lextree->rc[s][j] = i;
                j++;
            }
        }
        lextree->rc[s][j] = -1;     /* Terminate the list */
    }
}
Ejemplo n.º 9
0
/*
 * Add the word emitted by the given transition (fsglink) to the given lextree
 * (rooted at root), and return the new lextree root.  (There may actually be
 * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
 * "root" is the head of this list.)
 * lclist, rclist: sets of left and right context phones for this link.
 * alloc_head: head of a linear list of all allocated pnodes for the parent
 * FSG state, kept elsewhere and updated by this routine.
 */
static fsg_pnode_t *
psubtree_add_trans(fsg_lextree_t *lextree, 
                   fsg_pnode_t * root,
                   fsg_glist_linklist_t **curglist,
                   fsg_link_t * fsglink,
                   int16 *lclist, int16 *rclist,
                   fsg_pnode_t ** alloc_head)
{
    int32 silcipid;             /* Silence CI phone ID */
    int32 pronlen;              /* Pronunciation length */
    int32 wid;                  /* FSG (not dictionary!!) word ID */
    int32 dictwid;              /* Dictionary (not FSG!!) word ID */
    int32 ssid;                 /* Senone Sequence ID */
    int32 tmatid;
    gnode_t *gn;
    fsg_pnode_t *pnode, *pred, *head;
    int32 n_ci, p, lc, rc;
    glist_t lc_pnodelist;       /* Temp pnodes list for different left contexts */
    glist_t rc_pnodelist;       /* Temp pnodes list for different right contexts */
    int32 i, j;
    int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0;

    silcipid = bin_mdef_silphone(lextree->mdef);
    n_ci = bin_mdef_n_ciphone(lextree->mdef);

    wid = fsg_link_wid(fsglink);
    assert(wid >= 0);           /* Cannot be a null transition */
    dictwid = dict_wordid(lextree->dict,
                          fsg_model_word_str(lextree->fsg, wid));
    pronlen = dict_pronlen(lextree->dict, dictwid);
    assert(pronlen >= 1);

    assert(lclist[0] >= 0);     /* At least one phonetic context provided */
    assert(rclist[0] >= 0);

    head = *alloc_head;
    pred = NULL;

    if (pronlen == 1) {         /* Single-phone word */
        int ci = dict_first_phone(lextree->dict, dictwid);
        /* Only non-filler words are mpx */
        if (dict_filler_word(lextree->dict, dictwid)) {
            /*
             * Left diphone ID for single-phone words already assumes SIL is right
             * context; only left contexts need to be handled.
             */
            lc_pnodelist = NULL;

            for (i = 0; lclist[i] >= 0; i++) {
                lc = lclist[i];
                ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
                tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
                /* Check if this ssid already allocated for some other context */
                for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
                    pnode = (fsg_pnode_t *) gnode_ptr(gn);

                    if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
                        /* already allocated; share it for this context phone */
                        fsg_pnode_add_ctxt(pnode, lc);
                        break;
                    }
                }

                if (!gn) {      /* ssid not already allocated */
                    pnode =
                        (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
                    pnode->ctx = lextree->ctx;
                    pnode->next.fsglink = fsglink;
                    pnode->logs2prob =
                        (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
                        + lextree->wip + lextree->pip;
                    pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
                    pnode->ppos = 0;
                    pnode->leaf = TRUE;
                    pnode->sibling = root;      /* All root nodes linked together */
                    fsg_pnode_add_ctxt(pnode, lc);      /* Initially zeroed by calloc above */
                    pnode->alloc_next = head;
                    head = pnode;
                    root = pnode;
                    ++n_lc_alloc;

                    hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);

                    lc_pnodelist =
                        glist_add_ptr(lc_pnodelist, (void *) pnode);
                }
            }
Ejemplo n.º 10
0
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */
void kb_init (kb_t *kb)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    lm_t *lm;
    lmset_t *lmset;
    s3cipid_t sil, ci;
    s3wid_t w;
    int32 i, n, n_lc;
    wordprob_t *wp;
    s3cipid_t *lc;
    bitvec_t lc_active;
    char *str;
    int32 cisencnt;
    int32 j;
    
    /* Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));
    kb->kbcore = NULL;

    kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"),
			      cmd_ln_str("-feat"),
			      cmd_ln_str("-cmn"),
			      cmd_ln_str("-varnorm"),
			      cmd_ln_str("-agc"),
			      cmd_ln_str("-mdef"),
			      cmd_ln_str("-dict"),
			      cmd_ln_str("-fdict"),
			      "",	/* Hack!! Hardwired constant 
						for -compsep argument */
			      cmd_ln_str("-lm"),
			      cmd_ln_str("-lmctlfn"),
			      cmd_ln_str("-lmdumpdir"),
			      cmd_ln_str("-fillpen"),
			      cmd_ln_str("-senmgau"),
			      cmd_ln_float32("-silprob"),
			      cmd_ln_float32("-fillprob"),
			      cmd_ln_float32("-lw"),
			      cmd_ln_float32("-wip"),
			      cmd_ln_float32("-uw"),
			      cmd_ln_str("-mean"),
			      cmd_ln_str("-var"),
			      cmd_ln_float32("-varfloor"),
			      cmd_ln_str("-mixw"),
			      cmd_ln_float32("-mixwfloor"),
			      cmd_ln_str("-subvq"),
			      cmd_ln_str("-gs"),
			      cmd_ln_str("-tmat"),
			      cmd_ln_float32("-tmatfloor"));
    if(kb->kbcore==NULL){
      E_FATAL("Initialization of kb failed\n");
    }

    kbcore = kb->kbcore;
    
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    lm = kbcore_lm(kbcore);
    lmset=kbcore_lmset(kbcore);
    d2p = kbcore_dict2pid(kbcore);
    
    if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);

    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm)))
	E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name);
      }
    }else if(lm){
      if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
	E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
    }

    
    /* Check that HMM topology restrictions are not violated */
    if (tmat_chk_1skip (kbcore->tmat) < 0)
	E_FATAL("Tmat contains arcs skipping more than 1 state\n");
    
    /*
     * Unlink <s> and </s> between dictionary and LM, to prevent their 
     * recognition.  They are merely dummy words (anchors) at the beginning 
     * and end of each utterance.
     */
    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID;
	lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID;

	for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;
	for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;

      }
    }else if(lm){ /* No LM is set at this point*/
      lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
      lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
      for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
      for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;

    }
    sil = mdef_silphone (kbcore_mdef (kbcore));
    if (NOT_S3CIPID(sil))
	E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE);
    
    
    kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32));
    kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32));
    
    /* Build set of all possible left contexts */
    lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
    lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
    for (w = 0; w < dict_size (dict); w++) {
	ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
	if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
    }
    ci = mdef_silphone(mdef);
    bitvec_set (lc_active, ci);
    for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
	if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
    }
    lc[n_lc] = BAD_S3CIPID;

    E_INFO("Building lextrees\n");
    /* Get the number of lexical tree*/
    kb->n_lextree = cmd_ln_int32 ("-Nlextree");
    if (kb->n_lextree < 1) {
	E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
								kb->n_lextree);
	kb->n_lextree = 1;
    }

    /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */
    /* Build active word list */
    wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));


    if(lmset){
      kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *));
      /* Just allocate pointers*/
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));

      for(i=0;i<kbcore_nlm(kbcore);i++){
	E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name);
	n=0;
	for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */
	  wp[j].wid=-1;
	  wp[j].prob=-1;
	}
	n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp);
	E_INFO("Size of word table after unigram + words in class: %d.\n",n);
	if (n < 1)
	  E_FATAL("%d active words in %s\n", n,lmset[i].name);
	n = wid_wordprob2alt(dict,wp,n);
	E_INFO("Size of word table after adding alternative prons: %d.\n",n);
	if (cmd_ln_int32("-treeugprob") == 0) {
	  for (i = 0; i < n; i++)
	    wp[i].prob = -1;    	/* Flatten all initial probabilities */
	}

	for (j = 0; j < kb->n_lextree; j++) {
	  kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc);
	  lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0;
	  E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n",
		 kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j]));
	}
      }

    }else if (lm){
      E_INFO("Creating Unigram Table\n");
      n=0;
      n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp);
      E_INFO("Size of word table after unigram + words in class: %d\n",n);
      if (n < 1)
	E_FATAL("%d active words\n", n);
      n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
      
      /* Retain or remove unigram probs from lextree, depending on option */
      if (cmd_ln_int32("-treeugprob") == 0) {
	for (i = 0; i < n; i++)
	  wp[i].prob = -1;    	/* Flatten all initial probabilities */
      }
      
      /* Create the desired no. of unigram lextrees */
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
      for (i = 0; i < kb->n_lextree; i++) {
	kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
	lextree_type (kb->ugtree[i]) = 0;
      }
      E_INFO("Lextrees(%d), %d nodes(ug)\n",
	     kb->n_lextree, lextree_n_node(kb->ugtree[0]));
    }



    /* Create filler lextrees */
    /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */
    n = 0;
    for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
	if (dict_filler_word(dict, i)) {
	    wp[n].wid = i;
	    wp[n].prob = fillpen (kbcore->fillpen, i);
	    n++;
	}
    }


    kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
	lextree_type (kb->fillertree[i]) = -1;
    }
    ckd_free ((void *) wp);
    ckd_free ((void *) lc);
    bitvec_free (lc_active);


    E_INFO("Lextrees(%d), %d nodes(filler)\n",
	     kb->n_lextree, 
	     lextree_n_node(kb->fillertree[0]));
    

    if (cmd_ln_int32("-lextreedump")) {
      if(lmset){
	E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n");
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "UGTREE %d\n", i);
	lextree_dump (kb->ugtree[i], dict, stderr);
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "FILLERTREE %d\n", i);
	lextree_dump (kb->fillertree[i], dict, stderr);
      }
      fflush (stderr);
    }
    
    kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
				kbcore->dict2pid->n_comstate);
    kb->beam = beam_init (cmd_ln_float64("-subvqbeam"),
			  cmd_ln_float64("-beam"),
			  cmd_ln_float64("-pbeam"),
			  cmd_ln_float64("-wbeam"));
    E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n",
	   kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq);
    
    /*Sections of optimization related parameters*/
    kb->ds_ratio=cmd_ln_int32("-ds");
    E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio);
    
    kb->rec_bstcid=-1;
    kb->skip_count=0;
    
    kb->cond_ds=cmd_ln_int32("-cond_ds");
    E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds);
    
    if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n");

    kb->gs4gs=cmd_ln_int32("-gs4gs");
    E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs);

    kb->svq4svq=cmd_ln_int32("-svq4svq");
    E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq);

    kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam"));
    E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam);
    if(kb->ci_pbeam>10000000){
      E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n");
    }
    
    kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam"));
    E_INFO("Word-end pruning beam: %d\n",kb->wend_beam);

    kb->pl_window=cmd_ln_int32("-pl_window");
    E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window);

	kb->pl_window_start=0;

    kb->pl_beam=logs3(cmd_ln_float32("-pl_beam"));
    E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam);

    for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) 
      ;

    kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32));
    kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32));
    kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32));
  

    if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL)
	E_FATAL("feat_array_alloc() failed\n");
    
    kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));
    
    ptmr_init (&(kb->tm_sen));
    ptmr_init (&(kb->tm_srch));
    ptmr_init (&(kb->tm_ovrhd));
    kb->tot_fr = 0;
    kb->tot_sen_eval = 0.0;
    kb->tot_gau_eval = 0.0;
    kb->tot_hmm_eval = 0.0;
    kb->tot_wd_exit = 0.0;
    
    kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");

    if(lmset)
      n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;
    else
      n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;

    n /= kb->hmm_hist_binsize;
    kb->hmm_hist_bins = n+1;
    kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */
    
    /* Open hypseg file if specified */
    str = cmd_ln_str("-hypseg");
    kb->matchsegfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchsegfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchsegfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }

    str = cmd_ln_str("-hyp");
    kb->matchfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }
}
Ejemplo n.º 11
0
dict2pid_t *
dict2pid_build(bin_mdef_t * mdef, dict_t * dict)
{
    dict2pid_t *dict2pid;
    s3ssid_t ***rdiph_rc;
    bitvec_t *ldiph, *rdiph, *single;
    int32 pronlen;
    int32 b, l, r, w, p;

    E_INFO("Building PID tables for dictionary\n");
    assert(mdef);
    assert(dict);

    dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t));
    dict2pid->refcount = 1;
    dict2pid->mdef = bin_mdef_retain(mdef);
    dict2pid->dict = dict_retain(dict);
    E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n",
           mdef->n_ciphone, sizeof(s3ssid_t),
           mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024);
    dict2pid->ldiph_lc =
        (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
                                     mdef->n_ciphone, sizeof(s3ssid_t));
    /* Only used internally to generate rssid */
    rdiph_rc =
        (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
                                     mdef->n_ciphone, sizeof(s3ssid_t));

    dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone,
                                                       mdef->n_ciphone,
                                                       mdef->n_ciphone,
                                                       sizeof
                                                       (s3ssid_t));
    /* Actually could use memset for this, if BAD_S3SSID is guaranteed
     * to be 65535... */
    for (b = 0; b < mdef->n_ciphone; ++b) {
        for (r = 0; r < mdef->n_ciphone; ++r) {
            for (l = 0; l < mdef->n_ciphone; ++l) {
                dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
                dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID;
                rdiph_rc[b][l][r] = BAD_S3SSID;
            }
        }
    }

    /* Track which diphones / ciphones have been seen. */
    ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone);
    rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone);
    single = bitvec_alloc(mdef->n_ciphone);

    for (w = 0; w < dict_size(dict2pid->dict); w++) {
        pronlen = dict_pronlen(dict, w);

        if (pronlen >= 2) {
            b = dict_first_phone(dict, w);
            r = dict_second_phone(dict, w);
            /* Populate ldiph_lc */
            if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) {
                /* Mark this diphone as done */
                bitvec_set(ldiph, b * mdef->n_ciphone + r);

                /* Record all possible ssids for b(?,r) */
                for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
                    p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
                                              (s3cipid_t) l, (s3cipid_t) r,
                                              WORD_POSN_BEGIN);
                    dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p);
                }
            }


            /* Populate rdiph_rc */
            l = dict_second_last_phone(dict, w);
            b = dict_last_phone(dict, w);
            if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) {
                /* Mark this diphone as done */
                bitvec_set(rdiph, b * mdef->n_ciphone + l);

                for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
                    p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
                                              (s3cipid_t) l, (s3cipid_t) r,
                                              WORD_POSN_END);
                    rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p);
                }
            }
        }
        else if (pronlen == 1) {
            b = dict_pron(dict, w, 0);
            E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n",
                       dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b)));
            /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */
            if (bitvec_is_clear(single, b)) {
                populate_lrdiph(dict2pid, rdiph_rc, b);
                bitvec_set(single, b);
            }
        }
    }

    bitvec_free(ldiph);
    bitvec_free(rdiph);
    bitvec_free(single);

    /* Try to compress rdiph_rc into rdiph_rc_compressed */
    compress_right_context_tree(dict2pid, rdiph_rc);
    compress_left_right_context_tree(dict2pid);

    ckd_free_3d(rdiph_rc);

    dict2pid_report(dict2pid);
    return dict2pid;
}
Ejemplo n.º 12
0
Archivo: kb.c Proyecto: 10v/cmusphinx
void kb_init (kb_t *kb)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    lm_t *lm;
    s3cipid_t sil, ci;
    s3wid_t w;
    int32 i, n, n_lc;
    wordprob_t *wp;
    s3cipid_t *lc;
    bitvec_t lc_active;
    char *str;
    
    /* Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));

    kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"),
			      "1s_c_d_dd",    /* Hack!! Hardwired constant 
						for -feat argument */
			      cmd_ln_str("-cmn"),
			      cmd_ln_str("-varnorm"),
			      cmd_ln_str("-agc"),
			      cmd_ln_str("-mdef"),
			      cmd_ln_str("-dict"),
			      cmd_ln_str("-fdict"),
			      "",	/* Hack!! Hardwired constant 
						for -compsep argument */
			      cmd_ln_str("-lm"),
			      cmd_ln_str("-fillpen"),
			      cmd_ln_float32("-silprob"),
			      cmd_ln_float32("-fillprob"),
			      cmd_ln_float32("-lw"),
			      cmd_ln_float32("-wip"),
			      cmd_ln_float32("-uw"),
			      cmd_ln_str("-mean"),
			      cmd_ln_str("-var"),
			      cmd_ln_float32("-varfloor"),
			      cmd_ln_str("-mixw"),
			      cmd_ln_float32("-mixwfloor"),
			      cmd_ln_str("-subvq"),
			      cmd_ln_str("-tmat"),
			      cmd_ln_float32("-tmatfloor"));
    
    kbcore = kb->kbcore;
    
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    lm = kbcore_lm(kbcore);
    d2p = kbcore_dict2pid(kbcore);
    
    if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);
    if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
	E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
    
    /* Check that HMM topology restrictions are not violated */
    if (tmat_chk_1skip (kbcore->tmat) < 0)
	E_FATAL("Tmat contains arcs skipping more than 1 state\n");
    
    /*
     * Unlink <s> and </s> between dictionary and LM, to prevent their 
     * recognition.  They are merely dummy words (anchors) at the beginning 
     * and end of each utterance.
     */
    lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
    lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
    for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
    for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
    
    sil = mdef_silphone (kbcore_mdef (kbcore));
    if (NOT_S3CIPID(sil))
	E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE);
    
    E_INFO("Building lextrees\n");
    
    kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32));
    kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), 
							sizeof(int32));
    /* Build active word list */
    wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));
    n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp);
    if (n < 1)
	E_FATAL("%d active words\n", n);
    n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
    
    /* Retain or remove unigram probs from lextree, depending on option */
    if (cmd_ln_int32("-treeugprob") == 0) {
	for (i = 0; i < n; i++)
	    wp[i].prob = -1;    	/* Flatten all initial probabilities */
    }
    
    /* Build set of all possible left contexts */
    lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
    lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
    for (w = 0; w < dict_size (dict); w++) {
	ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
	if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
    }
    ci = mdef_silphone(mdef);
    bitvec_set (lc_active, ci);
    for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
	if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
    }
    lc[n_lc] = BAD_S3CIPID;
    
    /* Create the desired no. of unigram lextrees */
    kb->n_lextree = cmd_ln_int32 ("-Nlextree");
    if (kb->n_lextree < 1) {
	E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
								kb->n_lextree);
	kb->n_lextree = 1;
    }
    kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
	lextree_type (kb->ugtree[i]) = 0;
    }
    bitvec_free (lc_active);
    ckd_free ((void *) lc);
    
    /* Create filler lextrees */
    n = 0;
    for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
	if (dict_filler_word(dict, i)) {
	    wp[n].wid = i;
	    wp[n].prob = fillpen (kbcore->fillpen, i);
	    n++;
	}
    }
    kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
	lextree_type (kb->fillertree[i]) = -1;
    }
    ckd_free ((void *) wp);
    
    E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n",
	   kb->n_lextree, lextree_n_node(kb->ugtree[0]), 
			lextree_n_node(kb->fillertree[0]));
    
    if (cmd_ln_int32("-lextreedump")) {
	for (i = 0; i < kb->n_lextree; i++) {
	    fprintf (stderr, "UGTREE %d\n", i);
	    lextree_dump (kb->ugtree[i], dict, stderr);
	}
	for (i = 0; i < kb->n_lextree; i++) {
	    fprintf (stderr, "FILLERTREE %d\n", i);
	    lextree_dump (kb->fillertree[i], dict, stderr);
	}
	fflush (stderr);
    }
    
    kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
				kbcore->dict2pid->n_comstate);
    kb->beam = beam_init (cmd_ln_float64("-subvqbeam"),
			  cmd_ln_float64("-beam"),
			  cmd_ln_float64("-pbeam"),
			  cmd_ln_float64("-wbeam"));
    E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n",
	   kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq);
    
    if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL)
	E_FATAL("feat_array_alloc() failed\n");
    
    kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));
    
    ptmr_init (&(kb->tm_sen));
    ptmr_init (&(kb->tm_srch));
    kb->tot_fr = 0;
    kb->tot_sen_eval = 0.0;
    kb->tot_gau_eval = 0.0;
    kb->tot_hmm_eval = 0.0;
    kb->tot_wd_exit = 0.0;
    
    kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");
    n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;
    n /= kb->hmm_hist_binsize;
    kb->hmm_hist_bins = n+1;
    kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */
    
    /* Open hypseg file if specified */
    str = cmd_ln_str("-hypseg");
    kb->matchsegfp = NULL;
    if (str) {
#ifdef WIN32
	if ((kb->matchsegfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchsegfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }
}
/* RAH 4.16.01 This code has several leaks that must be fixed */
dict2pid_t *dict2pid_build (mdef_t *mdef, dict_t *dict)
{
    dict2pid_t *dict2pid;
    s3ssid_t *internal, **ldiph, **rdiph, *single;
    int32 pronlen;
    hash_table_t *hs, *hp;
    glist_t g;
    gnode_t *gn;
    s3senid_t *sen;
    hash_entry_t *he;
    int32 *cslen;
    int32 i, j, b, l, r, w, n, p;
    
    E_INFO("Building PID tables for dictionary\n");

    dict2pid = (dict2pid_t *) ckd_calloc (1, sizeof(dict2pid_t));
    dict2pid->internal = (s3ssid_t **) ckd_calloc (dict_size(dict), sizeof(s3ssid_t *));
    dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d (mdef->n_ciphone,
						       mdef->n_ciphone,
						       mdef->n_ciphone,
						       sizeof(s3ssid_t));
    dict2pid->single_lc = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone,
						       mdef->n_ciphone,
						       sizeof(s3ssid_t));
    dict2pid->n_comstate = 0;
    dict2pid->n_comsseq = 0;
    
    hs = hash_new (mdef->n_ciphone * mdef->n_ciphone * mdef->n_emit_state, HASH_CASE_YES);
    hp = hash_new (mdef->n_ciphone * mdef->n_ciphone, HASH_CASE_YES);
    
    for (w = 0, n = 0; w < dict_size(dict); w++) {
	pronlen = dict_pronlen(dict, w);
	if (pronlen < 0)
	    E_FATAL("Pronunciation-length(%s)= %d\n", dict_wordstr(dict, w), pronlen);
	n += pronlen;
    }

    internal = (s3ssid_t *) ckd_calloc (n, sizeof(s3ssid_t));
    
    /* Temporary */
    ldiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t));
    rdiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t));
    single = (s3ssid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3ssid_t));
    for (b = 0; b < mdef->n_ciphone; b++) {
	for (l = 0; l < mdef->n_ciphone; l++) {
	    for (r = 0; r < mdef->n_ciphone; r++)
		dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
	    
	    dict2pid->single_lc[b][l] = BAD_S3SSID;
	    
	    ldiph[b][l] = BAD_S3SSID;
	    rdiph[b][l] = BAD_S3SSID;
	}
	single[b] = BAD_S3SSID;
    }
    
    for (w = 0; w < dict_size(dict); w++) {
	dict2pid->internal[w] = internal;
	pronlen = dict_pronlen(dict,w);
	
	if (pronlen >= 2) {
	    b = dict_pron(dict, w, 0);
	    r = dict_pron(dict, w, 1);
	    if (NOT_S3SSID(ldiph[b][r])) {
		g = ldiph_comsseq(mdef, b, r);
		ldiph[b][r] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
		
		for (l = 0; l < mdef_n_ciphone(mdef); l++) {
		    p = mdef_phone_id_nearest (mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_BEGIN);
		    dict2pid->ldiph_lc[b][r][l] = mdef_pid2ssid(mdef, p);
		}
	    }
	    internal[0] = ldiph[b][r];
	    
	    for (i = 1; i < pronlen-1; i++) {
		l = b;
		b = r;
		r = dict_pron(dict, w, i+1);
		
		p = mdef_phone_id_nearest(mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_INTERNAL);
		internal[i] = mdef_pid2ssid(mdef, p);
	    }
	    
	    l = b;
	    b = r;
	    if (NOT_S3SSID(rdiph[b][l])) {
		g = rdiph_comsseq(mdef, b, l);
		rdiph[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
	    }
	    internal[pronlen-1] = rdiph[b][l];
	} else if (pronlen == 1) {
	    b = dict_pron(dict, w, 0);
	    if (NOT_S3SSID(single[b])) {
		g = single_comsseq(mdef, b);
		single[b] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
		
		for (l = 0; l < mdef_n_ciphone(mdef); l++) {
		    g = single_lc_comsseq(mdef, b, l);
		    dict2pid->single_lc[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		    glist_free (g);
		}
	    }
	    internal[0] = single[b];
	}
	
	internal += pronlen;
    }
    
    ckd_free_2d ((void **) ldiph);
    ckd_free_2d ((void **) rdiph);
    ckd_free ((void *) single);
    
    /* Allocate space for composite state table */
    cslen = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32));
    g = hash_tolist(hs, &n);
    assert (n == dict2pid->n_comstate);
    n = 0;
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	sen = (s3senid_t *) hash_entry_key(he);
	for (i = 0; IS_S3SENID(sen[i]); i++);
	
	cslen[hash_entry_val(he)] = i+1;	/* +1 for terminating sentinel */
	
	n += (i+1);
    }
    dict2pid->comstate = (s3senid_t **) ckd_calloc (dict2pid->n_comstate, sizeof(s3senid_t *));
    sen = (s3senid_t *) ckd_calloc (n, sizeof(s3senid_t));
    for (i = 0; i < dict2pid->n_comstate; i++) {
	dict2pid->comstate[i] = sen;
	sen += cslen[i];
    }
    
    /* Build composite state table from hash table hs */
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	sen = (s3senid_t *) hash_entry_key(he);
	i = hash_entry_val(he);
	
	for (j = 0; j < cslen[i]; j++)
	    dict2pid->comstate[i][j] = sen[j];
	assert (sen[j-1] == BAD_S3SENID);

	ckd_free ((void *)sen);
    }
    ckd_free (cslen);
    glist_free (g);
    hash_free (hs);
    
    /* Allocate space for composite sseq table */
    dict2pid->comsseq = (s3senid_t **) ckd_calloc (dict2pid->n_comsseq, sizeof(s3senid_t *));
    g = hash_tolist (hp, &n);
    assert (n == dict2pid->n_comsseq);
    
    /* Build composite sseq table */
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	i = hash_entry_val(he);
	dict2pid->comsseq[i] = (s3senid_t *) hash_entry_key(he);
    }
    glist_free (g);
    hash_free (hp);
    
    /* Weight for each composite state */
    dict2pid->comwt = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32));
    for (i = 0; i < dict2pid->n_comstate; i++) {
	sen = dict2pid->comstate[i];
	
	for (j = 0; IS_S3SENID(sen[j]); j++);
#if 0
	/* if comstate i has N states, its weight= (1/N^2) (Major Hack!!) */
	dict2pid->comwt[i] = - (logs3 ((float64)j) << 1);
#else
	/* if comstate i has N states, its weight= 1/N */
	dict2pid->comwt[i] = - logs3 ((float64)j);
#endif
    }
    
    E_INFO("%d composite states; %d composite sseq\n",
	   dict2pid->n_comstate, dict2pid->n_comsseq);
    
    return dict2pid;
}
Ejemplo n.º 14
0
/*
 * Compute the left and right context CIphone sets for each state.
 * (Needed for building the phone HMM net using cross-word triphones.  Invoke
 * after computing null transitions closure.)
 */
static void
word_fsg_lc_rc(word_fsg_t * fsg)
{
    int32 s, d, i, j;
    int32 n_ci;
    gnode_t *gn;
    word_fsglink_t *l;
    int32 silcipid;
    int32 endwid;
    int32 len;
    dict_t *dict;
    mdef_t *mdef;

    dict = fsg->dict;
    mdef = fsg->mdef;

    assert(fsg);
    assert(dict);
    assert(mdef);
    endwid = dict_basewid(dict, dict_finishwid(dict));

    silcipid = mdef_silphone(mdef);
    assert(silcipid >= 0);
    E_INFO("Value of silcipid %d\n", silcipid);
    n_ci = fsg->n_ciphone;
    E_INFO("No of CI phones %d\n", n_ci);
    if (n_ci > 127) {
        E_FATAL
            ("#phones(%d) > 127; cannot use int8** for word_fsg_t.{lc,rc}\n",
             n_ci);
    }

    /*
     * fsg->lc[s] = set of left context CIphones for state s.  Similarly, rc[s]
     * for right context CIphones.
     */
    fsg->lc =
        (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8));
    fsg->rc =
        (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8));

    for (s = 0; s < fsg->n_state; s++) {
        for (d = 0; d < fsg->n_state; d++) {
            for (gn = fsg->trans[s][d]; gn; gn = gnode_next(gn)) {
                l = (word_fsglink_t *) gnode_ptr(gn);
                assert(l->wid >= 0);

                /*
                 * Add the first CIphone of l->wid to the rclist of state s, and
                 * the last CIphone to lclist of state d.
                 * (Filler phones are a pain to deal with.  There is no direct
                 * marking of a filler phone; but only filler words are supposed to
                 * use such phones, so we use that fact.  HACK!!  FRAGILE!!)
                 */
                if (dict_filler_word(dict, l->wid) || (l->wid == endwid)) {
                    /* Filler phone; use silence phone as context */
                    fsg->rc[s][silcipid] = 1;
                    fsg->lc[d][silcipid] = 1;
                }
                else {
                    len = dict_pronlen(dict, l->wid);
                    fsg->rc[s][dict_pron(dict, l->wid, 0)] = 1;
                    fsg->lc[d][dict_pron(dict, l->wid, len - 1)] = 1;
                }
            }
        }

        /*
         * Add SIL phone to the lclist and rclist of each state.  Strictly
         * speaking, only needed at start and final states, respectively, but
         * all states considered since the user may change the start and final
         * states.  In any case, most applications would have a silence self
         * loop at each state, hence these would be needed anyway.
         */
        fsg->lc[s][silcipid] = 1;
        fsg->rc[s][silcipid] = 1;
    }

    /*
     * Propagate lc and rc lists past null transitions.  (Since FSG contains
     * null transitions closure, no need to worry about a chain of successive
     * null transitions.  Right??)
     */
    for (s = 0; s < fsg->n_state; s++) {
        for (d = 0; d < fsg->n_state; d++) {
            l = fsg->null_trans[s][d];
            if (l) {
                /*
                 * lclist(d) |= lclist(s), because all the words ending up at s, can
                 * now also end at d, becoming the left context for words leaving d.
                 */
                for (i = 0; i < n_ci; i++)
                    fsg->lc[d][i] |= fsg->lc[s][i];
                /*
                 * Similarly, rclist(s) |= rclist(d), because all the words leaving d
                 * can equivalently leave s, becoming the right context for words
                 * ending up at s.
                 */
                for (i = 0; i < n_ci; i++)
                    fsg->rc[s][i] |= fsg->rc[d][i];
            }
        }
    }

    /* Convert the bit-vector representation into a list */
    for (s = 0; s < fsg->n_state; s++) {
        j = 0;
        for (i = 0; i < n_ci; i++) {
            if (fsg->lc[s][i]) {
                fsg->lc[s][j] = i;
                j++;
            }
        }
        fsg->lc[s][j] = -1;     /* Terminate the list */

        j = 0;
        for (i = 0; i < n_ci; i++) {
            if (fsg->rc[s][i]) {
                fsg->rc[s][j] = i;
                j++;
            }
        }
        fsg->rc[s][j] = -1;     /* Terminate the list */
    }
}
Ejemplo n.º 15
0
/* FIXME: Somewhat the same as the above function, needs refactoring */
int
ps_alignment_populate_ci(ps_alignment_t *al)
{
    dict2pid_t *d2p;
    dict_t *dict;
    bin_mdef_t *mdef;
    int i;

    /* Clear phone and state sequences. */
    ps_alignment_vector_empty(&al->sseq);
    ps_alignment_vector_empty(&al->state);

    /* For each word, expand to phones/senone sequences. */
    d2p = al->d2p;
    dict = d2p->dict;
    mdef = d2p->mdef;
    for (i = 0; i < al->word.n_ent; ++i) {
        ps_alignment_entry_t *went = al->word.seq + i;
        ps_alignment_entry_t *sent;
        int wid = went->id.wid;
        int len = dict_pronlen(dict, wid);
        int j;

        for (j = 0; j < len; ++j) {
            if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
                E_ERROR("Failed to add phone entry!\n");
                return -1;
            }
            sent->id.pid.cipid = dict_pron(dict, wid, j);
            sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
            sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
            oe_assert(sent->id.pid.ssid != BAD_SSID);
            sent->start = went->start;
            sent->duration = went->duration;
            sent->parent = i;
        }
    }

    /* For each senone sequence, expand to senones.  (we could do this
     * nested above but this makes it more clear and easier to
     * refactor) */
    for (i = 0; i < al->sseq.n_ent; ++i) {
        ps_alignment_entry_t *pent = al->sseq.seq + i;
        ps_alignment_entry_t *sent;
        int j;

        for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
            if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
                E_ERROR("Failed to add state entry!\n");
                return -1;
            }
            sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
            oe_assert(sent->id.senid != BAD_SENID);
            sent->start = pent->start;
            sent->duration = pent->duration;
            sent->parent = i;
            if (j == 0)
                pent->child = (uint16)(sent - al->state.seq);
        }
    }

    return 0;
}
Ejemplo n.º 16
0
int
ps_alignment_populate(ps_alignment_t *al)
{
    dict2pid_t *d2p;
    dict_t *dict;
    bin_mdef_t *mdef;
    int i, lc;

    /* Clear phone and state sequences. */
    ps_alignment_vector_empty(&al->sseq);
    ps_alignment_vector_empty(&al->state);

    /* For each word, expand to phones/senone sequences. */
    d2p = al->d2p;
    dict = d2p->dict;
    mdef = d2p->mdef;
    lc = bin_mdef_silphone(mdef);
    for (i = 0; i < al->word.n_ent; ++i) {
        ps_alignment_entry_t *went = al->word.seq + i;
        ps_alignment_entry_t *sent;
        int wid = went->id.wid;
        int len = dict_pronlen(dict, wid);
        int j, rc;

        if (i < al->word.n_ent - 1)
            rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
        else
            rc = bin_mdef_silphone(mdef);

        /* First phone. */
        if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
            E_ERROR("Failed to add phone entry!\n");
            return -1;
        }
        sent->id.pid.cipid = dict_first_phone(dict, wid);
        sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
        sent->start = went->start;
        sent->duration = went->duration;
        sent->parent = i;
        went->child = (uint16)(sent - al->sseq.seq);
        if (len == 1)
            sent->id.pid.ssid
                = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
        else
            sent->id.pid.ssid
                = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
                                    dict_second_phone(dict, wid), lc);
        oe_assert(sent->id.pid.ssid != BAD_SSID);

        /* Internal phones. */
        for (j = 1; j < len - 1; ++j) {
            if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
                E_ERROR("Failed to add phone entry!\n");
                return -1;
            }
            sent->id.pid.cipid = dict_pron(dict, wid, j);
            sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
            sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
            oe_assert(sent->id.pid.ssid != BAD_SSID);
            sent->start = went->start;
            sent->duration = went->duration;
            sent->parent = i;
        }

        /* Last phone. */
        if (j < len) {
            xwdssid_t *rssid;
            oe_assert(j == len - 1);
            if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
                E_ERROR("Failed to add phone entry!\n");
                return -1;
            }
            sent->id.pid.cipid = dict_last_phone(dict, wid);
            sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
            rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
                                   dict_second_last_phone(dict, wid));
            sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
            oe_assert(sent->id.pid.ssid != BAD_SSID);
            sent->start = went->start;
            sent->duration = went->duration;
            sent->parent = i;
        }
        /* Update lc.  Could just use sent->id.pid.cipid here but that
         * seems needlessly obscure. */
        lc = dict_last_phone(dict, wid);
    }

    /* For each senone sequence, expand to senones.  (we could do this
     * nested above but this makes it more clear and easier to
     * refactor) */
    for (i = 0; i < al->sseq.n_ent; ++i) {
        ps_alignment_entry_t *pent = al->sseq.seq + i;
        ps_alignment_entry_t *sent;
        int j;

        for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
            if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
                E_ERROR("Failed to add state entry!\n");
                return -1;
            }
            sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
            oe_assert(sent->id.senid != BAD_SENID);
            sent->start = pent->start;
            sent->duration = pent->duration;
            sent->parent = i;
            if (j == 0)
                pent->child = (uint16)(sent - al->state.seq);
        }
    }

    return 0;
}
/**
 * Build HMM network for one utterance of fwdflat search.
 */
static void
build_fwdflat_chan(ngram_search_t *ngs)
{
    int32 i, wid, p;
    root_chan_t *rhmm;
    chan_t *hmm, *prevhmm;
    dict_t *dict;
    dict2pid_t *d2p;

    dict = ps_search_dict(ngs);
    d2p = ps_search_dict2pid(ngs);

    /* Build word HMMs for each word in the lattice. */
    for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
        wid = ngs->fwdflat_wordlist[i];

        /* Single-phone words are permanently allocated */
        if (dict_is_single_phone(dict, wid))
            continue;

        assert(ngs->word_chan[wid] == NULL);

        /* Multiplex root HMM for first phone (one root per word, flat
         * lexicon).  diphone is irrelevant here, for the time being,
         * at least. */
        rhmm = listelem_malloc(ngs->root_chan_alloc);
        rhmm->ci2phone = dict_second_phone(dict, wid);
        rhmm->ciphone = dict_first_phone(dict, wid);
        rhmm->next = NULL;
        hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE,
                 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone),
                 bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone));

        /* HMMs for word-internal phones */
        prevhmm = NULL;
        for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) {
            hmm = listelem_malloc(ngs->chan_alloc);
            hmm->ciphone = dict_pron(dict, wid, p);
            hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1;
            hmm->next = NULL;
            hmm_init(ngs->hmmctx, &hmm->hmm, FALSE,
                     dict2pid_internal(d2p,wid,p), 
		     bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone));

            if (prevhmm)
                prevhmm->next = hmm;
            else
                rhmm->next = hmm;

            prevhmm = hmm;
        }

        /* Right-context phones */
        ngram_search_alloc_all_rc(ngs, wid);

        /* Link in just allocated right-context phones */
        if (prevhmm)
            prevhmm->next = ngs->word_chan[wid];
        else
            rhmm->next = ngs->word_chan[wid];
        ngs->word_chan[wid] = (chan_t *) rhmm;
    }

}
Ejemplo n.º 18
0
Archivo: kb.c Proyecto: wdebeaum/cabot
/* Update kb w/ new dictionary and new LM.
 * assumes: single-LM kbcore (before & after)
 * requires: updating kbcore
 * Lucian Galescu, 08/11/2005
 */
void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile)
{
  kbcore_t *kbcore;
  mdef_t *mdef;
  dict_t *dict;
  dict2pid_t *d2p;
  lm_t *lm;
  s3cipid_t ci;
  s3wid_t w;
  int32 i, n, n_lc;
  wordprob_t *wp;
  s3cipid_t *lc;
  bitvec_t lc_active;
  
  /*** clean up ***/
  vithist_t *vithist = kb->vithist;

  if (kb->fillertree) 
    ckd_free ((void *)kb->fillertree);
  if (kb->hmm_hist) 
    ckd_free ((void *)kb->hmm_hist);
  
  
  /* vithist */
  if (vithist) {
    ckd_free ((void *) vithist->entry);
    ckd_free ((void *) vithist->frame_start);
    ckd_free ((void *) vithist->bestscore);
    ckd_free ((void *) vithist->bestvh);
    ckd_free ((void *) vithist->lms2vh_root);    
    ckd_free ((void *) kb->vithist);
  }
  
  /*** re-initialize ***/
  
  kb->kbcore = kbcore_update_lm(kb->kbcore, 
                                dictfile, 
                                cmd_ln_str("-fdict"),
                                "",	/* Hack!! Hardwired constant for -compsep argument */
                                lmfile,
                                cmd_ln_str("-fillpen"),
                                cmd_ln_float32("-silprob"),
                                cmd_ln_float32("-fillprob"),
                                cmd_ln_float32("-lw"),
                                cmd_ln_float32("-wip"),
                                cmd_ln_float32("-uw"));
  if(kb->kbcore==NULL){
    E_FATAL("Updating kbcore failed\n");
  }
  
  kbcore = kb->kbcore;
  
  mdef = kbcore_mdef(kbcore);
  dict = kbcore_dict(kbcore);
  lm = kbcore_lm(kbcore);
  d2p = kbcore_dict2pid(kbcore);
  
  if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
    E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);
  
  if(lm){
    if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
      E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
  }
  
  /*
   * Unlink <s> and </s> between dictionary and LM, to prevent their 
   * recognition.  They are merely dummy words (anchors) at the beginning 
   * and end of each utterance.
   */
  if(lm){
    lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
    lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
    for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
      kbcore->dict2lmwid[w] = BAD_S3LMWID;
    for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
      kbcore->dict2lmwid[w] = BAD_S3LMWID;
  }
  
  /* Build set of all possible left contexts */
  lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
  lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
  for (w = 0; w < dict_size (dict); w++) {
    ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
    if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
  }
  ci = mdef_silphone(mdef);
  bitvec_set (lc_active, ci);
  for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
    if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
  }
  lc[n_lc] = BAD_S3CIPID;
  
  E_INFO("Building lextrees\n");
  /* Get the number of lexical tree*/
  kb->n_lextree = cmd_ln_int32 ("-Nlextree");
  if (kb->n_lextree < 1) {
    E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
            kb->n_lextree);
    kb->n_lextree = 1;
  }
  
  /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */
  /* Build active word list */
  wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));
  
  
  if (lm) {
    E_INFO("Creating Unigram Table\n");
    n=0;
    n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp);
    E_INFO("Size of word table after unigram + words in class: %d\n",n);
    if (n < 1)
      E_FATAL("%d active words\n", n);
    n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
    
    /* Retain or remove unigram probs from lextree, depending on option */
    if (cmd_ln_int32("-treeugprob") == 0) {
      for (i = 0; i < n; i++)
        wp[i].prob = -1;    	/* Flatten all initial probabilities */
    }
    
    /* Create the desired no. of unigram lextrees */
    kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
    for (i = 0; i < kb->n_lextree; i++) {
      kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
      lextree_type (kb->ugtree[i]) = 0;
    }
    E_INFO("Lextrees(%d), %d nodes(ug)\n",
           kb->n_lextree, lextree_n_node(kb->ugtree[0]));
  }

  /* Create filler lextrees */
  /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */
  n = 0;
  for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
    if (dict_filler_word(dict, i)) {
      wp[n].wid = i;
      wp[n].prob = fillpen (kbcore->fillpen, i);
      n++;
    }
  }


  kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
  for (i = 0; i < kb->n_lextree; i++) {
    kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
    lextree_type (kb->fillertree[i]) = -1;
  }
  ckd_free ((void *) wp);
  ckd_free ((void *) lc);
  bitvec_free (lc_active);


  E_INFO("Lextrees(%d), %d nodes(filler)\n",
         kb->n_lextree, 
         lextree_n_node(kb->fillertree[0]));

  if (cmd_ln_int32("-lextreedump")) {
    for (i = 0; i < kb->n_lextree; i++) {
      fprintf (stderr, "UGTREE %d\n", i);
      lextree_dump (kb->ugtree[i], dict, stderr);
    }
    for (i = 0; i < kb->n_lextree; i++) {
      fprintf (stderr, "FILLERTREE %d\n", i);
      lextree_dump (kb->fillertree[i], dict, stderr);
    }
    fflush (stderr);
  }

  kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
                        kbcore->dict2pid->n_comstate);

  kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));

  kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");

  n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;

  n /= kb->hmm_hist_binsize;
  kb->hmm_hist_bins = n+1;
  kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */

}