static void populate_lrdiph(dict2pid_t *d2p, s3ssid_t ***rdiph_rc, s3cipid_t b) { bin_mdef_t *mdef = d2p->mdef; s3cipid_t l, r; for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { s3pid_t p; p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_SINGLE); d2p->lrdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); if (r == bin_mdef_silphone(mdef)) d2p->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); if (rdiph_rc && l == bin_mdef_silphone(mdef)) rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); assert(IS_S3SSID(bin_mdef_pid2ssid(mdef, p))); E_DEBUG(2,("%s(%s,%s) => %d / %d\n", bin_mdef_ciphone_str(mdef, b), bin_mdef_ciphone_str(mdef, l), bin_mdef_ciphone_str(mdef, r), p, bin_mdef_pid2ssid(mdef, p))); } } }
void fsg_psubtree_dump_node(fsg_lextree_t *tree, fsg_pnode_t *node, FILE *fp) { int32 i; fsg_link_t *tl; /* Indentation */ for (i = 0; i <= node->ppos; i++) fprintf(fp, " "); fprintf(fp, "%p.@", node); /* Pointer used as node * ID */ fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&node->hmm)); fprintf(fp, " %10d.LP", node->logs2prob); fprintf(fp, " %p.SIB", node->sibling); fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, node->ci_ext), node->ppos); if ((node->ppos == 0) || node->leaf) { fprintf(fp, " ["); for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) fprintf(fp, "%08x", node->ctxt.bv[i]); fprintf(fp, "]"); } if (node->leaf) { tl = node->next.fsglink; fprintf(fp, " {%s[%d->%d](%d)}", fsg_model_word_str(tree->fsg, tl->wid), tl->from_state, tl->to_state, tl->logs2prob); } else { fprintf(fp, " %p.NXT", node->next.succ); } fprintf(fp, "\n"); return; }
const char * dict_ciphone_str(dict_t * d, s3wid_t wid, int32 pos) { assert(d != NULL); assert((wid >= 0) && (wid < d->n_word)); assert((pos >= 0) && (pos < d->word[wid].pronlen)); return bin_mdef_ciphone_str(d->mdef, d->word[wid].ciphone[pos]); }
static void allphone_search_fill_iter(ps_seg_t *seg, phseg_t *phseg) { seg->sf = phseg->sf; seg->ef = phseg->ef; seg->ascr = phseg->score; seg->lscr = phseg->tscore; seg->word = bin_mdef_ciphone_str(ps_search_acmod(seg->search)->mdef, phseg->ci); }
int bin_mdef_phone_str(bin_mdef_t * m, int pid, char *buf) { char *wpos_name; assert(m); assert((pid >= 0) && (pid < m->n_phone)); wpos_name = WPOS_NAME; buf[0] = '\0'; if (pid < m->n_ciphone) sprintf(buf, "%s", bin_mdef_ciphone_str(m, pid)); else { sprintf(buf, "%s %s %s %c", bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0]), bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1]), bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2]), wpos_name[m->phone[pid].info.cd.wpos]); } return 0; }
void dict2pid_dump(FILE * fp, dict2pid_t * d2p) { int32 w, p, pronlen; int32 i, j, b, l, r; bin_mdef_t *mdef = d2p->mdef; dict_t *dict = d2p->dict; fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n"); for (w = 0; w < dict_size(dict); w++) { fprintf(fp, "%30s ", dict_wordstr(dict, w)); pronlen = dict_pronlen(dict, w); for (p = 0; p < pronlen; p++) fprintf(fp, " %5d", dict2pid_internal(d2p, w, p)); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# LDIPH_LC (b r l ssid)\n"); for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) { for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { if (IS_S3SSID(d2p->ldiph_lc[b][r][l])) fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]); /* RAH, ldiph_lc is returning an int32, %d expects an int16 */ } } } fprintf(fp, "#\n"); fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq); for (i = 0; i < mdef->n_sseq; i++) { fprintf(fp, "%5d ", i); for (j = 0; j < bin_mdef_n_emit_state(mdef); j++) fprintf(fp, " %5d", mdef->sseq[i][j]); fprintf(fp, "\n"); } fprintf(fp, "#\n"); fprintf(fp, "# END\n"); fflush(fp); }
int bin_mdef_phone_str(bin_mdef_t * m, int pid, char *buf) { char *wpos_name; assert(m); assert((pid >= 0) && (pid < m->n_phone)); wpos_name = WPOS_NAME; buf[0] = '\0'; if (pid < m->n_ciphone) { #ifndef POCKETSPHINX_NET sprintf(buf, "%s", bin_mdef_ciphone_str(m, pid)); #else strcpy(buf, bin_mdef_ciphone_str(m, pid)); #endif } else { #ifndef POCKETSPHINX_NET sprintf(buf, "%s %s %s %c", bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0]), bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1]), bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2]), wpos_name[m->phone[pid].info.cd.wpos]); #else strcpy(buf, bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0])); strcat(buf, bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1])); strcat(buf, bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2])); char tbf [2] = {wpos_name[m->phone[pid].info.cd.wpos]}; strcat(buf, tbf); #endif } return 0; }
dict2pid_t * dict2pid_build(bin_mdef_t * mdef, dict_t * dict) { dict2pid_t *dict2pid; s3ssid_t ***rdiph_rc; bitvec_t *ldiph, *rdiph, *single; int32 pronlen; int32 b, l, r, w, p; E_INFO("Building PID tables for dictionary\n"); assert(mdef); assert(dict); dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t)); dict2pid->refcount = 1; dict2pid->mdef = bin_mdef_retain(mdef); dict2pid->dict = dict_retain(dict); E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n", mdef->n_ciphone, sizeof(s3ssid_t), mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024); dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); /* Only used internally to generate rssid */ rdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t)); dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, mdef->n_ciphone, sizeof (s3ssid_t)); /* Actually could use memset for this, if BAD_S3SSID is guaranteed * to be 65535... */ for (b = 0; b < mdef->n_ciphone; ++b) { for (r = 0; r < mdef->n_ciphone; ++r) { for (l = 0; l < mdef->n_ciphone; ++l) { dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID; rdiph_rc[b][l][r] = BAD_S3SSID; } } } /* Track which diphones / ciphones have been seen. */ ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); single = bitvec_alloc(mdef->n_ciphone); for (w = 0; w < dict_size(dict2pid->dict); w++) { pronlen = dict_pronlen(dict, w); if (pronlen >= 2) { b = dict_first_phone(dict, w); r = dict_second_phone(dict, w); /* Populate ldiph_lc */ if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) { /* Mark this diphone as done */ bitvec_set(ldiph, b * mdef->n_ciphone + r); /* Record all possible ssids for b(?,r) */ for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_BEGIN); dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); } } /* Populate rdiph_rc */ l = dict_second_last_phone(dict, w); b = dict_last_phone(dict, w); if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) { /* Mark this diphone as done */ bitvec_set(rdiph, b * mdef->n_ciphone + l); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, (s3cipid_t) l, (s3cipid_t) r, WORD_POSN_END); rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); } } } else if (pronlen == 1) { b = dict_pron(dict, w, 0); E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n", dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b))); /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ if (bitvec_is_clear(single, b)) { populate_lrdiph(dict2pid, rdiph_rc, b); bitvec_set(single, b); } } } bitvec_free(ldiph); bitvec_free(rdiph); bitvec_free(single); /* Try to compress rdiph_rc into rdiph_rc_compressed */ compress_right_context_tree(dict2pid, rdiph_rc); compress_left_right_context_tree(dict2pid); ckd_free_3d(rdiph_rc); dict2pid_report(dict2pid); return dict2pid; }
int dict2pid_add_word(dict2pid_t *d2p, int32 wid) { bin_mdef_t *mdef = d2p->mdef; dict_t *d = d2p->dict; if (dict_pronlen(d, wid) > 1) { s3cipid_t l; /* Make sure we have left and right context diphones for this * word. */ if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0] == BAD_S3SSID) { E_INFO("Filling in left-context diphones for %s(?,%s)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid))); for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_first_phone(d, wid), l, dict_second_phone(d, wid), WORD_POSN_BEGIN); d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l] = bin_mdef_pid2ssid(mdef, p); } } if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid == 0) { s3ssid_t *rmap; s3ssid_t *tmpssid; s3cipid_t *tmpcimap; s3cipid_t r; E_INFO("Filling in right-context diphones for %s(%s,?)\n", bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)), bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid))); rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap)); for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { s3ssid_t p = bin_mdef_phone_id_nearest(mdef, dict_last_phone(d, wid), dict_second_last_phone(d, wid), r, WORD_POSN_END); rmap[r] = bin_mdef_pid2ssid(mdef, p); } tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid)); tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap)); compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef)); for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++) ; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap; d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r; ckd_free(rmap); } } else { /* Make sure we have a left-right context triphone entry for * this word. */ E_INFO("Filling in context triphones for %s(?,?)\n", bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid))); if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) { populate_lrdiph(d2p, NULL, dict_first_phone(d, wid)); } } return 0; }