int dict2pid_free(dict2pid_t * d2p) { if (d2p == NULL) return 0; if (--d2p->refcount > 0) return d2p->refcount; if (d2p->ldiph_lc) ckd_free_3d((void ***) d2p->ldiph_lc); if (d2p->lrdiph_rc) ckd_free_3d((void ***) d2p->lrdiph_rc); if (d2p->rssid) free_compress_map(d2p->rssid, bin_mdef_n_ciphone(d2p->mdef)); if (d2p->lrssid) free_compress_map(d2p->lrssid, bin_mdef_n_ciphone(d2p->mdef)); bin_mdef_free(d2p->mdef); dict_free(d2p->dict); ckd_free(d2p); return 0; }
static void populate_lrdiph(dict2pid_t *d2p, s3ssid_t ***rdiph_rc, s3cipid_t b) { bin_mdef_t *mdef = d2p->mdef; s3cipid_t l, r; for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { s3pid_t p; p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_SINGLE); d2p->lrdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); if (r == bin_mdef_silphone(mdef)) d2p->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); if (rdiph_rc && l == bin_mdef_silphone(mdef)) rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); assert(IS_S3SSID(bin_mdef_pid2ssid(mdef, p))); E_DEBUG(2,("%s(%s,%s) => %d / %d\n", bin_mdef_ciphone_str(mdef, b), bin_mdef_ciphone_str(mdef, l), bin_mdef_ciphone_str(mdef, r), p, bin_mdef_pid2ssid(mdef, p))); } } }
void dict2pid_dump(FILE * fp, dict2pid_t * d2p) { int32 w, p, pronlen; int32 i, j, b, l, r; bin_mdef_t *mdef = d2p->mdef; dict_t *dict = d2p->dict; fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n"); for (w = 0; w < dict_size(dict); w++) { fprintf(fp, "%30s ", dict_wordstr(dict, w)); pronlen = dict_pronlen(dict, w); for (p = 0; p < pronlen; p++) fprintf(fp, " %5d", dict2pid_internal(d2p, w, p)); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# LDIPH_LC (b r l ssid)\n"); for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) { for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { if (IS_S3SSID(d2p->ldiph_lc[b][r][l])) fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]); /* RAH, ldiph_lc is returning an int32, %d expects an int16 */ } } } fprintf(fp, "#\n"); fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq); for (i = 0; i < mdef->n_sseq; i++) { fprintf(fp, "%5d ", i); for (j = 0; j < bin_mdef_n_emit_state(mdef); j++) fprintf(fp, " %5d", mdef->sseq[i][j]); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# END\n"); fflush(fp); }
void fsg_history_set_fsg(fsg_history_t *h, fsg_model_t *fsg, dict_t *dict) { if (blkarray_list_n_valid(h->entries) != 0) { E_WARN("Switching FSG while history not empty; history cleared\n"); blkarray_list_reset(h->entries); } if (h->frame_entries) ckd_free_2d((void **) h->frame_entries); h->frame_entries = NULL; h->fsg = fsg; if (fsg && dict) { h->n_ciphone = bin_mdef_n_ciphone(dict->mdef); h->frame_entries = (glist_t **) ckd_calloc_2d(fsg_model_n_state(fsg), bin_mdef_n_ciphone(dict->mdef), sizeof(glist_t)); } }
fsg_history_t * fsg_history_init(fsg_model_t * fsg, dict_t *dict) { fsg_history_t *h; h = (fsg_history_t *) ckd_calloc(1, sizeof(fsg_history_t)); h->fsg = fsg; h->entries = blkarray_list_init(); if (fsg && dict) { h->n_ciphone = bin_mdef_n_ciphone(dict->mdef); h->frame_entries = (glist_t **) ckd_calloc_2d(fsg_model_n_state(fsg), bin_mdef_n_ciphone(dict->mdef), sizeof(**h->frame_entries)); } else { h->frame_entries = NULL; } return h; }
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { phone_loop_search_t *pls = (phone_loop_search_t *)search; cmd_ln_t *config = ps_search_config(search); acmod_t *acmod = ps_search_acmod(search); int i; /* Free old dict2pid, dict, if necessary. */ ps_search_base_reinit(search, dict, d2p); /* Initialize HMM context. */ if (pls->hmmctx) hmm_context_free(pls->hmmctx); pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (pls->hmmctx == NULL) return -1; /* Initialize penalty storage */ pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); pls->window = cmd_ln_int32_r(config, "-pl_window"); if (pls->penalties) ckd_free(pls->penalties); pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); if (pls->pen_buf) ckd_free_2d(pls->pen_buf); pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); /* Initialize phone HMMs. */ if (pls->hmms) { for (i = 0; i < pls->n_phones; ++i) hmm_deinit((hmm_t *)&pls->hmms[i]); ckd_free(pls->hmms); } pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); for (i = 0; i < pls->n_phones; ++i) { hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], FALSE, bin_mdef_pid2ssid(acmod->mdef, i), bin_mdef_pid2tmatid(acmod->mdef, i)); } pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight"); pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT; pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT; pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT; E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", pls->beam, pls->pbeam, pls->pip); return 0; }
static fsg_pnode_t * fsg_psubtree_init(fsg_lextree_t *lextree, fsg_model_t * fsg, int32 from_state, fsg_pnode_t ** alloc_head) { int32 dst; gnode_t *gn; fsg_link_t *fsglink; fsg_pnode_t *root; int32 n_ci; fsg_glist_linklist_t *glist = NULL; root = NULL; assert(*alloc_head == NULL); n_ci = bin_mdef_n_ciphone(lextree->mdef); if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) { E_FATAL ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n", FSG_PNODE_CTXT_BVSZ * 32); } for (dst = 0; dst < fsg_model_n_state(fsg); dst++) { /* Add all links from from_state to dst */ for (gn = fsg_model_trans(fsg, from_state, dst); gn; gn = gnode_next(gn)) { /* Add word emitted by this transition (fsglink) to lextree */ fsglink = (fsg_link_t *) gnode_ptr(gn); assert(fsg_link_wid(fsglink) >= 0); /* Cannot be a null trans */ root = psubtree_add_trans(lextree, root, &glist, fsglink, lextree->lc[from_state], lextree->rc[dst], alloc_head); } } fsg_glist_linklist_free(glist); return root; }
/** * Compute the left and right context CIphone sets for each state. */ static void fsg_lextree_lc_rc(fsg_lextree_t *lextree) { int32 s, i, j; int32 n_ci; fsg_model_t *fsg; int32 silcipid; int32 len; silcipid = bin_mdef_silphone(lextree->mdef); assert(silcipid >= 0); n_ci = bin_mdef_n_ciphone(lextree->mdef); fsg = lextree->fsg; /* * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s] * for right context CIphones. */ lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc)); lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc)); E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n", fsg->n_state * (n_ci + 1) * 2, fsg->n_state * (n_ci + 1) * 2 / 1024); for (s = 0; s < fsg->n_state; s++) { fsg_arciter_t *itor; for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *l = fsg_arciter_get(itor); int32 dictwid; /**< Dictionary (not FSG) word ID!! */ if (fsg_link_wid(l) >= 0) { dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, l->wid)); /* * Add the first CIphone of l->wid to the rclist of state s, and * the last CIphone to lclist of state d. * (Filler phones are a pain to deal with. There is no direct * marking of a filler phone; but only filler words are supposed to * use such phones, so we use that fact. HACK!! FRAGILE!!) */ if (fsg_model_is_filler(fsg, fsg_link_wid(l))) { /* Filler phone; use silence phone as context */ lextree->rc[fsg_link_from_state(l)][silcipid] = 1; lextree->lc[fsg_link_to_state(l)][silcipid] = 1; } else { len = dict_pronlen(lextree->dict, dictwid); lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1; lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1; } } } } for (s = 0; s < fsg->n_state; s++) { /* * Add SIL phone to the lclist and rclist of each state. Strictly * speaking, only needed at start and final states, respectively, but * all states considered since the user may change the start and final * states. In any case, most applications would have a silence self * loop at each state, hence these would be needed anyway. */ lextree->lc[s][silcipid] = 1; lextree->rc[s][silcipid] = 1; } /* * Propagate lc and rc lists past null transitions. (Since FSG contains * null transitions closure, no need to worry about a chain of successive * null transitions. Right??) * * This can't be joined with the previous loop because we first calculate * contexts and only then we can propagate them. */ for (s = 0; s < fsg->n_state; s++) { fsg_arciter_t *itor; for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *l = fsg_arciter_get(itor); if (fsg_link_wid(l) < 0) { /* * lclist(d) |= lclist(s), because all the words ending up at s, can * now also end at d, becoming the left context for words leaving d. */ for (i = 0; i < n_ci; i++) lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i]; /* * Similarly, rclist(s) |= rclist(d), because all the words leaving d * can equivalently leave s, becoming the right context for words * ending up at s. */ for (i = 0; i < n_ci; i++) lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i]; } } } /* Convert the bit-vector representation into a list */ for (s = 0; s < fsg->n_state; s++) { j = 0; for (i = 0; i < n_ci; i++) { if (lextree->lc[s][i]) { lextree->lc[s][j] = i; j++; } } lextree->lc[s][j] = -1; /* Terminate the list */ j = 0; for (i = 0; i < n_ci; i++) { if (lextree->rc[s][i]) { lextree->rc[s][j] = i; j++; } } lextree->rc[s][j] = -1; /* Terminate the list */ } }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ int32 tmatid; gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; ++n_lc_alloc; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } }
dict2pid_t * dict2pid_build(bin_mdef_t * mdef, dict_t * dict) { dict2pid_t *dict2pid; s3ssid_t ***rdiph_rc; bitvec_t *ldiph, *rdiph, *single; int32 pronlen; int32 b, l, r, w, p; E_INFO("Building PID tables for dictionary\n"); assert(mdef); assert(dict); dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t)); dict2pid->refcount = 1; dict2pid->mdef = bin_mdef_retain(mdef); dict2pid->dict = dict_retain(dict); E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n", mdef->n_ciphone, sizeof(s3ssid_t), mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024); dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); /* Only used internally to generate rssid */ rdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof (s3ssid_t)); /* Actually could use memset for this, if BAD_S3SSID is guaranteed * to be 65535... */ for (b = 0; b < mdef->n_ciphone; ++b) { for (r = 0; r < mdef->n_ciphone; ++r) { for (l = 0; l < mdef->n_ciphone; ++l) { dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID; rdiph_rc[b][l][r] = BAD_S3SSID; } } } /* Track which diphones / ciphones have been seen. */ ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); single = bitvec_alloc(mdef->n_ciphone); for (w = 0; w < dict_size(dict2pid->dict); w++) { pronlen = dict_pronlen(dict, w); if (pronlen >= 2) { b = dict_first_phone(dict, w); r = dict_second_phone(dict, w); /* Populate ldiph_lc */ if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) { /* Mark this diphone as done */ bitvec_set(ldiph, b * mdef->n_ciphone + r); /* Record all possible ssids for b(?,r) */ for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_BEGIN); dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); } } /* Populate rdiph_rc */ l = dict_second_last_phone(dict, w); b = dict_last_phone(dict, w); if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) { /* Mark this diphone as done */ bitvec_set(rdiph, b * mdef->n_ciphone + l); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_END); rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); } } } else if (pronlen == 1) { b = dict_pron(dict, w, 0); E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n", dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b))); /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ if (bitvec_is_clear(single, b)) { populate_lrdiph(dict2pid, rdiph_rc, b); bitvec_set(single, b); } } } bitvec_free(ldiph); bitvec_free(rdiph); bitvec_free(single); /* Try to compress rdiph_rc into rdiph_rc_compressed */ compress_right_context_tree(dict2pid, rdiph_rc); compress_left_right_context_tree(dict2pid); ckd_free_3d(rdiph_rc); dict2pid_report(dict2pid); return dict2pid; }
int dict2pid_add_word(dict2pid_t *d2p, int32 wid) { bin_mdef_t *mdef = d2p->mdef; dict_t *d = d2p->dict; if (dict_pronlen(d, wid) > 1) { s3cipid_t l; /* Make sure we have left and right context diphones for this * word. */ if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0] == BAD_S3SSID) { E_INFO("Filling in left-context diphones for %s(?,%s)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid))); for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_first_phone(d, wid), l, dict_second_phone(d, wid), WORD_POSN_BEGIN); d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l] = bin_mdef_pid2ssid(mdef, p); } } if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid == 0) { s3ssid_t *rmap; s3ssid_t *tmpssid; s3cipid_t *tmpcimap; s3cipid_t r; E_INFO("Filling in right-context diphones for %s(%s,?)\n", bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid))); rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap)); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_last_phone(d, wid), dict_second_last_phone(d, wid), r, WORD_POSN_END); rmap[r] = bin_mdef_pid2ssid(mdef, p); } tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid)); tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap)); compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef)); for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++) ; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r; ckd_free(rmap); } } else { /* Make sure we have a left-right context triphone entry for * this word. */ E_INFO("Filling in context triphones for %s(?,?)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid))); if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) { populate_lrdiph(d2p, NULL, dict_first_phone(d, wid)); } } return 0; }
/** * Build net from phone HMMs */ static int phmm_build(allphone_search_t * allphs) { phmm_t *p, **pid2phmm; bin_mdef_t *mdef; int32 lrc_size; uint32 *lc, *rc; s3pid_t pid; s3cipid_t ci; s3cipid_t *filler; int n_phmm, n_link; int i, nphone; mdef = ((ps_search_t *) allphs)->acmod->mdef; allphs->ci_phmm = (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *)); pid2phmm = (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *)); /* For each unique ciphone/triphone entry in mdef, create a PHMM node */ n_phmm = 0; nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef); E_INFO("Building PHMM net of %d phones\n", nphone); for (pid = 0; pid < nphone; pid++) { if ((p = phmm_lookup(allphs, pid)) == NULL) { //not found, should be created p = (phmm_t *) ckd_calloc(1, sizeof(*p)); hmm_init(allphs->hmmctx, &(p->hmm), FALSE, mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat); p->pid = pid; p->ci = bin_mdef_pid2ci(mdef, pid); p->succlist = NULL; p->next = allphs->ci_phmm[p->ci]; allphs->ci_phmm[p->ci] = p; n_phmm++; } pid2phmm[pid] = p; } /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */ lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef)); lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t)); rc = lc + (n_phmm * lrc_size); for (ci = 0; ci < mdef->n_ciphone; ci++) { for (p = allphs->ci_phmm[ci]; p; p = p->next) { p->lc = lc; lc += lrc_size; p->rc = rc; rc += lrc_size; } } /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */ filler = (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t)); /* Connect fillers */ i = 0; for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) { p = pid2phmm[ci]; bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef)); bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef)); if (mdef->phone[ci].info.ci.filler) { filler[i++] = ci; } } filler[i] = BAD_S3CIPID; /* Loop over cdphones only if ci_only is not set */ for (pid = bin_mdef_n_ciphone(mdef); pid < nphone; pid++) { p = pid2phmm[pid]; if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) bitvec_set(p->lc, filler[i]); } else bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]); if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) { for (i = 0; IS_S3CIPID(filler[i]); i++) bitvec_set(p->rc, filler[i]); } else bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]); } ckd_free(pid2phmm); ckd_free(filler); /* Create links between PHMM nodes */ n_link = phmm_link(allphs); E_INFO("%d nodes, %d links\n", n_phmm, n_link); return 0; }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } } glist_free(lc_pnodelist); } else { /* Filler word; no context modelled */ ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; fsg_pnode_add_all_ctxt(&(pnode->ctxt)); pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); } } else { /* Multi-phone word */ fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ ssid_pnode_map = (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); lc_pnodelist = NULL; rc_pnodelist = NULL; for (p = 0; p < pronlen; p++) { int ci = dict_pron(lextree->dict, dictwid, p); if (p == 0) { /* Root phone, handle required left contexts */ /* Find if we already have an lc_pnodelist for the first phone of this word */ fsg_glist_linklist_t *predglist=*curglist; fsg_glist_linklist_t *glist=*curglist; rc = dict_pron(lextree->dict, dictwid, 1); while (glist && glist->glist && glist->ci != ci && glist->rc != rc){ glist = glist->next; } if (glist && glist->ci == ci && glist->rc == rc && glist->glist) { /* We've found a valid glist. Hook to it and move to next phoneme */ lc_pnodelist = glist->glist; /* Set the predecessor node for the future tree first */ pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); continue; } else { /* Two cases that can bring us here * a. glist == NULL, i.e. end of current list. Create new entry. * b. glist->glist == NULL, i.e. first entry into list. */ if (!glist) { /* Case a; reduce it to case b by allocing glist */ glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); glist->next = predglist; *curglist = glist; } glist->ci = ci; glist->rc = rc; glist->lc = -1; lc_pnodelist = glist->glist = NULL; /* Gets created below */ } for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); /* Compression is not done by d2p, so we do it * here. This might be slow, but it might not * be... we'll see. */ pnode = ssid_pnode_map[0]; for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { pnode = ssid_pnode_map[j]; if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) break; } assert(j < n_ci); if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* This bit is tricky! For now we'll put the prob in the final link only */ /* pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; */ pnode->logs2prob = lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = FALSE; pnode->sibling = root; /* All root nodes linked together */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } fsg_pnode_add_ctxt(pnode, lc); } /* Put the lc_pnodelist back into glist */ glist->glist = lc_pnodelist; /* The predecessor node for the future tree is the root */ pred = root; } else if (p != pronlen - 1) { /* Word internal phone */ fsg_pnode_t *pnodeyoungest; ssid = dict2pid_internal(lextree->d2p, dictwid, p); /* First check if we already have this ssid in our tree */ pnode = pred->next.succ; pnodeyoungest = pnode; /* The youngest sibling */ while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { pnode = pnode->sibling; } if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { /* Found the ssid; go to next phoneme */ pred = pnode; continue; } /* pnode not found, allocate it */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->logs2prob = lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = FALSE; pnode->sibling = pnodeyoungest; /* May be NULL */ if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); pred->next.succ = pnode; } } else { /* Predecessor = word internal node */ pred->next.succ = pnode; } pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); pred = pnode; } else { /* Leaf phone, handle required right contexts */ /* Note, leaf phones are not part of the tree */ xwdssid_t *rssid; memset((void *) ssid_pnode_map, 0, n_ci * sizeof(fsg_pnode_t *)); lc = dict_pron(lextree->dict, dictwid, p-1); rssid = dict2pid_rssid(lextree->d2p, ci, lc); for (i = 0; rclist[i] >= 0; i++) { rc = rclist[i]; j = rssid->cimap[rc]; ssid = rssid->ssid[j]; pnode = ssid_pnode_map[j]; if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* We are plugging the word prob here. Ugly */ /* pnode->logs2prob = lextree->pip; */ pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = TRUE; pnode->sibling = rc_pnodelist ? (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; pnode->next.fsglink = fsglink; pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); rc_pnodelist = glist_add_ptr(rc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } else { assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); } fsg_pnode_add_ctxt(pnode, rc); } if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); /* Since all entries of lc_pnodelist point to the same array, sufficient to update it once */ break; } } } else { /* Predecessor = word internal node */ if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); } } } } ckd_free((void *) ssid_pnode_map); /* glist_free(lc_pnodelist); Nope; this gets freed outside */ glist_free(rc_pnodelist); } *alloc_head = head; return root; }