dict2pid_t * dict2pid_build(bin_mdef_t * mdef, dict_t * dict) { dict2pid_t *dict2pid; s3ssid_t ***rdiph_rc; bitvec_t *ldiph, *rdiph, *single; int32 pronlen; int32 b, l, r, w, p; E_INFO("Building PID tables for dictionary\n"); assert(mdef); assert(dict); dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t)); dict2pid->refcount = 1; dict2pid->mdef = bin_mdef_retain(mdef); dict2pid->dict = dict_retain(dict); E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n", mdef->n_ciphone, sizeof(s3ssid_t), mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024); dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); /* Only used internally to generate rssid */ rdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof (s3ssid_t)); /* Actually could use memset for this, if BAD_S3SSID is guaranteed * to be 65535... */ for (b = 0; b < mdef->n_ciphone; ++b) { for (r = 0; r < mdef->n_ciphone; ++r) { for (l = 0; l < mdef->n_ciphone; ++l) { dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID; rdiph_rc[b][l][r] = BAD_S3SSID; } } } /* Track which diphones / ciphones have been seen. */ ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); single = bitvec_alloc(mdef->n_ciphone); for (w = 0; w < dict_size(dict2pid->dict); w++) { pronlen = dict_pronlen(dict, w); if (pronlen >= 2) { b = dict_first_phone(dict, w); r = dict_second_phone(dict, w); /* Populate ldiph_lc */ if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) { /* Mark this diphone as done */ bitvec_set(ldiph, b * mdef->n_ciphone + r); /* Record all possible ssids for b(?,r) */ for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_BEGIN); dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); } } /* Populate rdiph_rc */ l = dict_second_last_phone(dict, w); b = dict_last_phone(dict, w); if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) { /* Mark this diphone as done */ bitvec_set(rdiph, b * mdef->n_ciphone + l); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_END); rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); } } } else if (pronlen == 1) { b = dict_pron(dict, w, 0); E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n", dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b))); /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ if (bitvec_is_clear(single, b)) { populate_lrdiph(dict2pid, rdiph_rc, b); bitvec_set(single, b); } } } bitvec_free(ldiph); bitvec_free(rdiph); bitvec_free(single); /* Try to compress rdiph_rc into rdiph_rc_compressed */ compress_right_context_tree(dict2pid, rdiph_rc); compress_left_right_context_tree(dict2pid); ckd_free_3d(rdiph_rc); dict2pid_report(dict2pid); return dict2pid; }
int ps_alignment_populate(ps_alignment_t *al) { dict2pid_t *d2p; dict_t *dict; bin_mdef_t *mdef; int i, lc; /* Clear phone and state sequences. */ ps_alignment_vector_empty(&al->sseq); ps_alignment_vector_empty(&al->state); /* For each word, expand to phones/senone sequences. */ d2p = al->d2p; dict = d2p->dict; mdef = d2p->mdef; lc = bin_mdef_silphone(mdef); for (i = 0; i < al->word.n_ent; ++i) { ps_alignment_entry_t *went = al->word.seq + i; ps_alignment_entry_t *sent; int wid = went->id.wid; int len = dict_pronlen(dict, wid); int j, rc; if (i < al->word.n_ent - 1) rc = dict_first_phone(dict, al->word.seq[i+1].id.wid); else rc = bin_mdef_silphone(mdef); /* First phone. */ if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_first_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->start = went->start; sent->duration = went->duration; sent->parent = i; went->child = (uint16)(sent - al->sseq.seq); if (len == 1) sent->id.pid.ssid = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc); else sent->id.pid.ssid = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid, dict_second_phone(dict, wid), lc); oe_assert(sent->id.pid.ssid != BAD_SSID); /* Internal phones. */ for (j = 1; j < len - 1; ++j) { if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_pron(dict, wid, j); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); sent->id.pid.ssid = dict2pid_internal(d2p, wid, j); oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Last phone. */ if (j < len) { xwdssid_t *rssid; oe_assert(j == len - 1); if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { E_ERROR("Failed to add phone entry!\n"); return -1; } sent->id.pid.cipid = dict_last_phone(dict, wid); sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); rssid = dict2pid_rssid(d2p, sent->id.pid.cipid, dict_second_last_phone(dict, wid)); sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]]; oe_assert(sent->id.pid.ssid != BAD_SSID); sent->start = went->start; sent->duration = went->duration; sent->parent = i; } /* Update lc. Could just use sent->id.pid.cipid here but that * seems needlessly obscure. */ lc = dict_last_phone(dict, wid); } /* For each senone sequence, expand to senones. (we could do this * nested above but this makes it more clear and easier to * refactor) */ for (i = 0; i < al->sseq.n_ent; ++i) { ps_alignment_entry_t *pent = al->sseq.seq + i; ps_alignment_entry_t *sent; int j; for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { E_ERROR("Failed to add state entry!\n"); return -1; } sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); oe_assert(sent->id.senid != BAD_SENID); sent->start = pent->start; sent->duration = pent->duration; sent->parent = i; if (j == 0) pent->child = (uint16)(sent - al->state.seq); } } return 0; }
int dict2pid_add_word(dict2pid_t *d2p, int32 wid) { bin_mdef_t *mdef = d2p->mdef; dict_t *d = d2p->dict; if (dict_pronlen(d, wid) > 1) { s3cipid_t l; /* Make sure we have left and right context diphones for this * word. */ if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0] == BAD_S3SSID) { E_INFO("Filling in left-context diphones for %s(?,%s)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid))); for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_first_phone(d, wid), l, dict_second_phone(d, wid), WORD_POSN_BEGIN); d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l] = bin_mdef_pid2ssid(mdef, p); } } if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid == 0) { s3ssid_t *rmap; s3ssid_t *tmpssid; s3cipid_t *tmpcimap; s3cipid_t r; E_INFO("Filling in right-context diphones for %s(%s,?)\n", bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid))); rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap)); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_last_phone(d, wid), dict_second_last_phone(d, wid), r, WORD_POSN_END); rmap[r] = bin_mdef_pid2ssid(mdef, p); } tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid)); tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap)); compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef)); for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++) ; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r; ckd_free(rmap); } } else { /* Make sure we have a left-right context triphone entry for * this word. */ E_INFO("Filling in context triphones for %s(?,?)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid))); if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) { populate_lrdiph(d2p, NULL, dict_first_phone(d, wid)); } } return 0; }
/** * Build HMM network for one utterance of fwdflat search. */ static void build_fwdflat_chan(ngram_search_t *ngs) { int32 i, wid, p; root_chan_t *rhmm; chan_t *hmm, *prevhmm; dict_t *dict; dict2pid_t *d2p; dict = ps_search_dict(ngs); d2p = ps_search_dict2pid(ngs); /* Build word HMMs for each word in the lattice. */ for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { wid = ngs->fwdflat_wordlist[i]; /* Single-phone words are permanently allocated */ if (dict_is_single_phone(dict, wid)) continue; assert(ngs->word_chan[wid] == NULL); /* Multiplex root HMM for first phone (one root per word, flat * lexicon). diphone is irrelevant here, for the time being, * at least. */ rhmm = listelem_malloc(ngs->root_chan_alloc); rhmm->ci2phone = dict_second_phone(dict, wid); rhmm->ciphone = dict_first_phone(dict, wid); rhmm->next = NULL; hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE, bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone), bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone)); /* HMMs for word-internal phones */ prevhmm = NULL; for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) { hmm = listelem_malloc(ngs->chan_alloc); hmm->ciphone = dict_pron(dict, wid, p); hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1; hmm->next = NULL; hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, dict2pid_internal(d2p,wid,p), bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone)); if (prevhmm) prevhmm->next = hmm; else rhmm->next = hmm; prevhmm = hmm; } /* Right-context phones */ ngram_search_alloc_all_rc(ngs, wid); /* Link in just allocated right-context phones */ if (prevhmm) prevhmm->next = ngs->word_chan[wid]; else rhmm->next = ngs->word_chan[wid]; ngs->word_chan[wid] = (chan_t *) rhmm; } }