int fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, char const *altword) { int i, basewid, altwid; int ntrans; /* FIXME: This will get slow, eventually... */ for (basewid = 0; basewid < fsg->n_word; ++basewid) if (0 == strcmp(fsg->vocab[basewid], baseword)) break; if (basewid == fsg->n_word) { E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword); return -1; } altwid = fsg_model_word_add(fsg, altword); if (fsg->altwords == NULL) fsg->altwords = bitvec_alloc(fsg->n_word_alloc); bitvec_set(fsg->altwords, altwid); if (fsg_model_is_filler(fsg, basewid)) { if (fsg->silwords == NULL) fsg->silwords = bitvec_alloc(fsg->n_word_alloc); bitvec_set(fsg->silwords, altwid); } E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n", baseword, altword)); /* Look for all transitions involving baseword and duplicate them. */ /* FIXME: This will also get slow, eventually... */ ntrans = 0; for (i = 0; i < fsg->n_state; ++i) { hash_iter_t *itor; if (fsg->trans[i].trans == NULL) continue; for (itor = hash_table_iter(fsg->trans[i].trans); itor; itor = hash_table_iter_next(itor)) { glist_t trans; gnode_t *gn; trans = hash_entry_val(itor->ent); for (gn = trans; gn; gn = gnode_next(gn)) { fsg_link_t *fl = gnode_ptr(gn); if (fl->wid == basewid) { fsg_link_t *link; /* Create transition object */ link = listelem_malloc(fsg->link_alloc); link->from_state = fl->from_state; link->to_state = fl->to_state; link->logs2prob = fl->logs2prob; /* FIXME!!!??? */ link->wid = altwid; trans = glist_add_ptr(trans, (void *) link); ++ntrans; } } hash_entry_val(itor->ent) = trans; } } E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans)); return ntrans; }
/** * Compute the left and right context CIphone sets for each state. */ static void fsg_lextree_lc_rc(fsg_lextree_t *lextree) { int32 s, i, j; int32 n_ci; fsg_model_t *fsg; int32 silcipid; int32 len; silcipid = bin_mdef_silphone(lextree->mdef); assert(silcipid >= 0); n_ci = bin_mdef_n_ciphone(lextree->mdef); fsg = lextree->fsg; /* * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s] * for right context CIphones. */ lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc)); lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc)); E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n", fsg->n_state * (n_ci + 1) * 2, fsg->n_state * (n_ci + 1) * 2 / 1024); for (s = 0; s < fsg->n_state; s++) { fsg_arciter_t *itor; for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *l = fsg_arciter_get(itor); int32 dictwid; /**< Dictionary (not FSG) word ID!! */ if (fsg_link_wid(l) >= 0) { dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, l->wid)); /* * Add the first CIphone of l->wid to the rclist of state s, and * the last CIphone to lclist of state d. * (Filler phones are a pain to deal with. There is no direct * marking of a filler phone; but only filler words are supposed to * use such phones, so we use that fact. HACK!! FRAGILE!!) */ if (fsg_model_is_filler(fsg, fsg_link_wid(l))) { /* Filler phone; use silence phone as context */ lextree->rc[fsg_link_from_state(l)][silcipid] = 1; lextree->lc[fsg_link_to_state(l)][silcipid] = 1; } else { len = dict_pronlen(lextree->dict, dictwid); lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1; lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1; } } } } for (s = 0; s < fsg->n_state; s++) { /* * Add SIL phone to the lclist and rclist of each state. Strictly * speaking, only needed at start and final states, respectively, but * all states considered since the user may change the start and final * states. In any case, most applications would have a silence self * loop at each state, hence these would be needed anyway. */ lextree->lc[s][silcipid] = 1; lextree->rc[s][silcipid] = 1; } /* * Propagate lc and rc lists past null transitions. (Since FSG contains * null transitions closure, no need to worry about a chain of successive * null transitions. Right??) * * This can't be joined with the previous loop because we first calculate * contexts and only then we can propagate them. */ for (s = 0; s < fsg->n_state; s++) { fsg_arciter_t *itor; for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *l = fsg_arciter_get(itor); if (fsg_link_wid(l) < 0) { /* * lclist(d) |= lclist(s), because all the words ending up at s, can * now also end at d, becoming the left context for words leaving d. */ for (i = 0; i < n_ci; i++) lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i]; /* * Similarly, rclist(s) |= rclist(d), because all the words leaving d * can equivalently leave s, becoming the right context for words * ending up at s. */ for (i = 0; i < n_ci; i++) lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i]; } } } /* Convert the bit-vector representation into a list */ for (s = 0; s < fsg->n_state; s++) { j = 0; for (i = 0; i < n_ci; i++) { if (lextree->lc[s][i]) { lextree->lc[s][j] = i; j++; } } lextree->lc[s][j] = -1; /* Terminate the list */ j = 0; for (i = 0; i < n_ci; i++) { if (lextree->rc[s][i]) { lextree->rc[s][j] = i; j++; } } lextree->rc[s][j] = -1; /* Terminate the list */ } }