static void ngram_fwdflat_allocate_1ph(ngram_search_t *ngs) { dict_t *dict = ps_search_dict(ngs); int n_words = ps_search_n_words(ngs); int i, w; /* Allocate single-phone words, since they won't have * been allocated for us by fwdtree initialization. */ ngs->n_1ph_words = 0; for (w = 0; w < n_words; w++) { if (dict_is_single_phone(dict, w)) ++ngs->n_1ph_words; } ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->single_phone_wid)); ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph)); i = 0; for (w = 0; w < n_words; w++) { if (!dict_is_single_phone(dict, w)) continue; /* DICT2PID location */ ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w); ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef); hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE, /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone), /* tmatid */ bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone)); ngs->rhmm_1ph[i].next = NULL; ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]); ngs->single_phone_wid[i] = w; i++; } }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ int32 tmatid; gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; ++n_lc_alloc; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } }
dict2pid_t * dict2pid_build(bin_mdef_t * mdef, dict_t * dict) { dict2pid_t *dict2pid; s3ssid_t ***rdiph_rc; bitvec_t *ldiph, *rdiph, *single; int32 pronlen; int32 b, l, r, w, p; E_INFO("Building PID tables for dictionary\n"); assert(mdef); assert(dict); dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t)); dict2pid->refcount = 1; dict2pid->mdef = bin_mdef_retain(mdef); dict2pid->dict = dict_retain(dict); E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n", mdef->n_ciphone, sizeof(s3ssid_t), mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024); dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); /* Only used internally to generate rssid */ rdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof (s3ssid_t)); /* Actually could use memset for this, if BAD_S3SSID is guaranteed * to be 65535... */ for (b = 0; b < mdef->n_ciphone; ++b) { for (r = 0; r < mdef->n_ciphone; ++r) { for (l = 0; l < mdef->n_ciphone; ++l) { dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID; rdiph_rc[b][l][r] = BAD_S3SSID; } } } /* Track which diphones / ciphones have been seen. */ ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); single = bitvec_alloc(mdef->n_ciphone); for (w = 0; w < dict_size(dict2pid->dict); w++) { pronlen = dict_pronlen(dict, w); if (pronlen >= 2) { b = dict_first_phone(dict, w); r = dict_second_phone(dict, w); /* Populate ldiph_lc */ if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) { /* Mark this diphone as done */ bitvec_set(ldiph, b * mdef->n_ciphone + r); /* Record all possible ssids for b(?,r) */ for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_BEGIN); dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); } } /* Populate rdiph_rc */ l = dict_second_last_phone(dict, w); b = dict_last_phone(dict, w); if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) { /* Mark this diphone as done */ bitvec_set(rdiph, b * mdef->n_ciphone + l); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_END); rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); } } } else if (pronlen == 1) { b = dict_pron(dict, w, 0); E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n", dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b))); /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ if (bitvec_is_clear(single, b)) { populate_lrdiph(dict2pid, rdiph_rc, b); bitvec_set(single, b); } } } bitvec_free(ldiph); bitvec_free(rdiph); bitvec_free(single); /* Try to compress rdiph_rc into rdiph_rc_compressed */ compress_right_context_tree(dict2pid, rdiph_rc); compress_left_right_context_tree(dict2pid); ckd_free_3d(rdiph_rc); dict2pid_report(dict2pid); return dict2pid; }
int dict2pid_add_word(dict2pid_t *d2p, int32 wid) { bin_mdef_t *mdef = d2p->mdef; dict_t *d = d2p->dict; if (dict_pronlen(d, wid) > 1) { s3cipid_t l; /* Make sure we have left and right context diphones for this * word. */ if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0] == BAD_S3SSID) { E_INFO("Filling in left-context diphones for %s(?,%s)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid))); for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_first_phone(d, wid), l, dict_second_phone(d, wid), WORD_POSN_BEGIN); d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l] = bin_mdef_pid2ssid(mdef, p); } } if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid == 0) { s3ssid_t *rmap; s3ssid_t *tmpssid; s3cipid_t *tmpcimap; s3cipid_t r; E_INFO("Filling in right-context diphones for %s(%s,?)\n", bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid))); rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap)); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_last_phone(d, wid), dict_second_last_phone(d, wid), r, WORD_POSN_END); rmap[r] = bin_mdef_pid2ssid(mdef, p); } tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid)); tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap)); compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef)); for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++) ; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r; ckd_free(rmap); } } else { /* Make sure we have a left-right context triphone entry for * this word. */ E_INFO("Filling in context triphones for %s(?,?)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid))); if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) { populate_lrdiph(d2p, NULL, dict_first_phone(d, wid)); } } return 0; }
int ps_alignment_populate(ps_alignment_t *al) { dict2pid_t *d2p; dict_t *dict; bin_mdef_t *mdef; int i, lc; /* Clear phone and state sequences. */ ps_alignment_vector_empty(&al->sseq); ps_alignment_vector_empty(&al->state); /* For each word, expand to phones/senone sequences. */ d2p = al->d2p; dict = d2p->dict; mdef = d2p->mdef; lc = bin_mdef_silphone(mdef); for (i = 0; i < al->word.n_ent; ++i) { ps_alignment_entry_t *went = al->word.seq + i; ps_alignment_entry_t *sent; int wid = went->id.wid; int len = dict_pronlen(dict, wid); int j, rc; if (i < al->word.n_ent - 1) rc = dict_first_phone(dict, al->word.seq[i+1].id.wid); else rc = bin_mdef_silphone(mdef); /* First phone. */ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_first_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->start = went->start; sent->duration = went->duration; sent->parent = i; went->child = (uint16)(sent - al->sseq.seq); if (len == 1) sent->id.pid.ssid = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc); else sent->id.pid.ssid = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid, dict_second_phone(dict, wid), lc); oe_assert(sent->id.pid.ssid != BAD_SSID); /* Internal phones. */ for (j = 1; j < len - 1; ++j) { if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_pron(dict, wid, j); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->id.pid.ssid = dict2pid_internal(d2p, wid, j); oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Last phone. */ if (j < len) { xwdssid_t *rssid; oe_assert(j == len - 1); if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_last_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); rssid = dict2pid_rssid(d2p, sent->id.pid.cipid, dict_second_last_phone(dict, wid)); sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]]; oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Update lc. Could just use sent->id.pid.cipid here but that * seems needlessly obscure. */ lc = dict_last_phone(dict, wid); } /* For each senone sequence, expand to senones. (we could do this * nested above but this makes it more clear and easier to * refactor) */ for (i = 0; i < al->sseq.n_ent; ++i) { ps_alignment_entry_t *pent = al->sseq.seq + i; ps_alignment_entry_t *sent; int j; for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { E_ERROR("Failed to add state entry!\n"); return -1; } sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); oe_assert(sent->id.senid != BAD_SENID); sent->start = pent->start; sent->duration = pent->duration; sent->parent = i; if (j == 0) pent->child = (uint16)(sent - al->state.seq); } } return 0; }
static void fwdflat_word_transition(ngram_search_t *ngs, int frame_idx) { int32 cf, nf, b, thresh, pip, i, w, newscore; int32 best_silrc_score = 0, best_silrc_bp = 0; /* FIXME: good defaults? */ bptbl_t *bp; int32 *rcss; root_chan_t *rhmm; int32 *awl; float32 lwf; dict_t *dict = ps_search_dict(ngs); dict2pid_t *d2p = ps_search_dict2pid(ngs); cf = frame_idx; nf = cf + 1; thresh = ngs->best_score + ngs->fwdflatbeam; pip = ngs->pip; best_silrc_score = WORST_SCORE; lwf = ngs->fwdflat_fwdtree_lw_ratio; /* Search for all words starting within a window of this frame. * These are the successors for words exiting now. */ get_expand_wordlist(ngs, cf, ngs->max_sf_win); /* Scan words exited in current frame */ for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) { xwdssid_t *rssid; int32 silscore; bp = ngs->bp_table + b; ngs->word_lat_idx[bp->wid] = NO_BP; if (bp->wid == ps_search_finish_wid(ngs)) continue; /* DICT2PID location */ /* Get the mapping from right context phone ID to index in the * right context table and the bscore_stack. */ rcss = ngs->bscore_stack + bp->s_idx; if (bp->last2_phone == -1) rssid = NULL; else rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone); /* Transition to all successor words. */ for (i = 0; ngs->expand_word_list[i] >= 0; i++) { int32 n_used; w = ngs->expand_word_list[i]; /* Get the exit score we recorded in save_bwd_ptr(), or * something approximating it. */ if (rssid) newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]]; else newscore = bp->score; if (newscore == WORST_SCORE) continue; /* FIXME: Floating point... */ newscore += lwf * (ngram_tg_score(ngs->lmset, dict_basewid(dict, w), bp->real_wid, bp->prev_real_wid, &n_used) >> SENSCR_SHIFT); newscore += pip; /* Enter the next word */ if (newscore BETTER_THAN thresh) { rhmm = (root_chan_t *) ngs->word_chan[w]; if ((hmm_frame(&rhmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { hmm_enter(&rhmm->hmm, newscore, b, nf); /* DICT2PID: This is where mpx ssids get introduced. */ /* Look up the ssid to use when entering this mpx triphone. */ hmm_mpx_ssid(&rhmm->hmm, 0) = dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, dict_last_phone(dict, bp->wid)); assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0))); E_DEBUG(6,("ssid %d(%d,%d) = %d\n", rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone, hmm_mpx_ssid(&rhmm->hmm, 0))); bitvec_set(ngs->word_active, w); } } } /* Get the best exit into silence. */ if (rssid) silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]]; else silscore = bp->score; if (silscore BETTER_THAN best_silrc_score) { best_silrc_score = silscore; best_silrc_bp = b; } } /* Transition to <sil> */ newscore = best_silrc_score + ngs->silpen + pip; if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { w = ps_search_silence_wid(ngs); rhmm = (root_chan_t *) ngs->word_chan[w]; if ((hmm_frame(&rhmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { hmm_enter(&rhmm->hmm, newscore, best_silrc_bp, nf); bitvec_set(ngs->word_active, w); } } /* Transition to noise words */ newscore = best_silrc_score + ngs->fillpen + pip; if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) { rhmm = (root_chan_t *) ngs->word_chan[w]; /* Noise words that aren't a single phone will have NULL here. */ if (rhmm == NULL) continue; if ((hmm_frame(&rhmm->hmm) < cf) || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { hmm_enter(&rhmm->hmm, newscore, best_silrc_bp, nf); bitvec_set(ngs->word_active, w); } } } /* Reset initial channels of words that have become inactive even after word trans. */ i = ngs->n_active_word[cf & 0x1]; awl = ngs->active_word_list[cf & 0x1]; for (w = *(awl++); i > 0; --i, w = *(awl++)) { rhmm = (root_chan_t *) ngs->word_chan[w]; if (hmm_frame(&rhmm->hmm) == cf) { hmm_clear_scores(&rhmm->hmm); } } }
/** * Build HMM network for one utterance of fwdflat search. */ static void build_fwdflat_chan(ngram_search_t *ngs) { int32 i, wid, p; root_chan_t *rhmm; chan_t *hmm, *prevhmm; dict_t *dict; dict2pid_t *d2p; dict = ps_search_dict(ngs); d2p = ps_search_dict2pid(ngs); /* Build word HMMs for each word in the lattice. */ for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { wid = ngs->fwdflat_wordlist[i]; /* Single-phone words are permanently allocated */ if (dict_is_single_phone(dict, wid)) continue; assert(ngs->word_chan[wid] == NULL); /* Multiplex root HMM for first phone (one root per word, flat * lexicon). diphone is irrelevant here, for the time being, * at least. */ rhmm = listelem_malloc(ngs->root_chan_alloc); rhmm->ci2phone = dict_second_phone(dict, wid); rhmm->ciphone = dict_first_phone(dict, wid); rhmm->next = NULL; hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE, bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone), bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone)); /* HMMs for word-internal phones */ prevhmm = NULL; for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) { hmm = listelem_malloc(ngs->chan_alloc); hmm->ciphone = dict_pron(dict, wid, p); hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1; hmm->next = NULL; hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, dict2pid_internal(d2p,wid,p), bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone)); if (prevhmm) prevhmm->next = hmm; else rhmm->next = hmm; prevhmm = hmm; } /* Right-context phones */ ngram_search_alloc_all_rc(ngs, wid); /* Link in just allocated right-context phones */ if (prevhmm) prevhmm->next = ngs->word_chan[wid]; else rhmm->next = ngs->word_chan[wid]; ngs->word_chan[wid] = (chan_t *) rhmm; } }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } } glist_free(lc_pnodelist); } else { /* Filler word; no context modelled */ ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; fsg_pnode_add_all_ctxt(&(pnode->ctxt)); pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); } } else { /* Multi-phone word */ fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ ssid_pnode_map = (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); lc_pnodelist = NULL; rc_pnodelist = NULL; for (p = 0; p < pronlen; p++) { int ci = dict_pron(lextree->dict, dictwid, p); if (p == 0) { /* Root phone, handle required left contexts */ /* Find if we already have an lc_pnodelist for the first phone of this word */ fsg_glist_linklist_t *predglist=*curglist; fsg_glist_linklist_t *glist=*curglist; rc = dict_pron(lextree->dict, dictwid, 1); while (glist && glist->glist && glist->ci != ci && glist->rc != rc){ glist = glist->next; } if (glist && glist->ci == ci && glist->rc == rc && glist->glist) { /* We've found a valid glist. Hook to it and move to next phoneme */ lc_pnodelist = glist->glist; /* Set the predecessor node for the future tree first */ pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); continue; } else { /* Two cases that can bring us here * a. glist == NULL, i.e. end of current list. Create new entry. * b. glist->glist == NULL, i.e. first entry into list. */ if (!glist) { /* Case a; reduce it to case b by allocing glist */ glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); glist->next = predglist; *curglist = glist; } glist->ci = ci; glist->rc = rc; glist->lc = -1; lc_pnodelist = glist->glist = NULL; /* Gets created below */ } for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); /* Compression is not done by d2p, so we do it * here. This might be slow, but it might not * be... we'll see. */ pnode = ssid_pnode_map[0]; for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { pnode = ssid_pnode_map[j]; if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) break; } assert(j < n_ci); if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* This bit is tricky! For now we'll put the prob in the final link only */ /* pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; */ pnode->logs2prob = lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = FALSE; pnode->sibling = root; /* All root nodes linked together */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } fsg_pnode_add_ctxt(pnode, lc); } /* Put the lc_pnodelist back into glist */ glist->glist = lc_pnodelist; /* The predecessor node for the future tree is the root */ pred = root; } else if (p != pronlen - 1) { /* Word internal phone */ fsg_pnode_t *pnodeyoungest; ssid = dict2pid_internal(lextree->d2p, dictwid, p); /* First check if we already have this ssid in our tree */ pnode = pred->next.succ; pnodeyoungest = pnode; /* The youngest sibling */ while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { pnode = pnode->sibling; } if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { /* Found the ssid; go to next phoneme */ pred = pnode; continue; } /* pnode not found, allocate it */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->logs2prob = lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = FALSE; pnode->sibling = pnodeyoungest; /* May be NULL */ if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); pred->next.succ = pnode; } } else { /* Predecessor = word internal node */ pred->next.succ = pnode; } pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); pred = pnode; } else { /* Leaf phone, handle required right contexts */ /* Note, leaf phones are not part of the tree */ xwdssid_t *rssid; memset((void *) ssid_pnode_map, 0, n_ci * sizeof(fsg_pnode_t *)); lc = dict_pron(lextree->dict, dictwid, p-1); rssid = dict2pid_rssid(lextree->d2p, ci, lc); for (i = 0; rclist[i] >= 0; i++) { rc = rclist[i]; j = rssid->cimap[rc]; ssid = rssid->ssid[j]; pnode = ssid_pnode_map[j]; if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* We are plugging the word prob here. Ugly */ /* pnode->logs2prob = lextree->pip; */ pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = TRUE; pnode->sibling = rc_pnodelist ? (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; pnode->next.fsglink = fsglink; pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); rc_pnodelist = glist_add_ptr(rc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } else { assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); } fsg_pnode_add_ctxt(pnode, rc); } if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); /* Since all entries of lc_pnodelist point to the same array, sufficient to update it once */ break; } } } else { /* Predecessor = word internal node */ if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); } } } } ckd_free((void *) ssid_pnode_map); /* glist_free(lc_pnodelist); Nope; this gets freed outside */ glist_free(rc_pnodelist); } *alloc_head = head; return root; }