static void dump_pnode_info (pnode_t *p) { if (NOT_WID(p->wid)) printf ("%s", (p->id == -1) ? "<head>" : "<tail>"); else printf ("%s.%d.", dict_wordstr(p->wid), p->pos, mdef_ciphone_str (mdef, p->ci)); printf ("%s", IS_CIPID(p->lc) ? mdef_ciphone_str (mdef, p->lc) : "-"); printf ("(%s)", IS_CIPID(p->ci) ? mdef_ciphone_str (mdef, p->ci) : "-"); printf ("%s", IS_CIPID(p->rc) ? mdef_ciphone_str (mdef, p->rc) : "-"); }
static void vithist_lmstate_dump (vithist_t *vh, kbcore_t *kbc, FILE *fp) { glist_t gl; gnode_t *lgn, *gn; int32 i; vh_lmstate2vithist_t *lms2vh; mdef_t *mdef; lm_t *lm; mdef = kbcore_mdef (kbc); lm = kbcore_lm (kbc); fprintf (fp, "LMSTATE\n"); for (lgn = vh->lwidlist; lgn; lgn = gnode_next(lgn)) { i = (int32) gnode_int32 (lgn); gl = vh->lmstate_root[i]; assert (gl); for (gn = gl; gn; gn = gnode_next(gn)) { lms2vh = (vh_lmstate2vithist_t *) gnode_ptr (gn); fprintf (fp, "\t%s.%s -> %d\n", lm_wordstr(lm, i), mdef_ciphone_str (mdef, lms2vh->state), lms2vh->vhid); vithist_lmstate_subtree_dump (vh, kbc, lms2vh, 1, fp); } } fprintf (fp, "END_LMSTATE\n"); fflush (fp); }
static void pronerr_output (char *id, s3wid_t *ref, int32 nref, wseg_t *wseg, s3cipid_t *ap, int8 *ap_err, int32 ws, int32 we, int32 ps, int32 pe) { int32 j; s3wid_t rcwid, lcwid; char str[4096]; /* Word sequence for region in error */ sprintf (str, "%s", dict_wordstr (dict, dict_basewid(dict, ref[ws]))); for (j = ws+1; j <= we; j++) { strcat (str, " "); strcat (str, dict_wordstr (dict, dict_basewid(dict, ref[j]))); } printf ("%-22s\t=>\t", str); /* Print left context phone */ /*lcwid = ((wseg[ws].s < 0) && (ws > 0) && IS_WID(ref[ws-1])) ? ref[ws-1] : BAD_WID;*/ lcwid = (ws > 0) ? ref[ws-1] : BAD_WID; if (IS_WID(lcwid)) { j = dict->word[lcwid].pronlen - 1; sprintf (str, "(%s)", mdef_ciphone_str (mdef, dict->word[lcwid].ciphone[j])); } else strcpy (str, "()"); printf ("%-5s", str); /* Phone sequence for region in error */ for (j = ps; j <= pe; j++) { strcpy (str, mdef_ciphone_str (mdef, ap[j])); if (ap_err[j]) ucase (str); else lcase (str); printf (" %s", str); } /* Right context if ending in error */ /* rcwid = ((wseg[we].e < 0) && IS_WID(ref[we+1])) ? ref[we+1] : BAD_WID; */ rcwid = ref[we+1]; if (IS_WID(rcwid)) printf ("\t(%s)", mdef_ciphone_str (mdef, dict->word[rcwid].ciphone[0])); else printf ("\t()"); printf (" ( %s )\n", id); }
int32 mdef_phone_str (mdef_t *m, s3pid_t pid, char *buf) { assert (m); assert ((pid >= 0) && (pid < mdef->n_phone)); buf[0] = '\0'; if (pid < mdef->n_ciphone) sprintf (buf, "%s", mdef_ciphone_str (m, (s3cipid_t) pid)); else { sprintf (buf, "%s %s %s %c", mdef_ciphone_str(m, m->phone[pid].ci), mdef_ciphone_str(m, m->phone[pid].lc), mdef_ciphone_str(m, m->phone[pid].rc), wpos_name[m->phone[pid].wpos]); } return 0; }
const char *dict_ciphone_str (dict_t *d, s3wid_t wid, int32 pos) { assert (d != NULL); assert ((wid >= 0) && (wid < d->n_word)); assert ((pos >= 0) && (pos < d->word[wid].pronlen)); if (d->mdef) return mdef_ciphone_str (d->mdef, d->word[wid].ciphone[pos]); else return (d->ciphone_str[d->word[wid].ciphone[pos]]); }
int mdef_phone_str(mdef_t * m, int pid, char *buf) { char *wpos_name; assert(m); assert((pid >= 0) && (pid < m->n_phone)); wpos_name = WPOS_NAME; buf[0] = '\0'; if (pid < m->n_ciphone) sprintf(buf, "%s", mdef_ciphone_str(m, pid)); else { sprintf(buf, "%s %s %s %c", mdef_ciphone_str(m, m->phone[pid].ci), mdef_ciphone_str(m, m->phone[pid].lc), mdef_ciphone_str(m, m->phone[pid].rc), wpos_name[m->phone[pid].wpos]); } return 0; }
static void dump_pnode_succ (pnode_t *p) { plink_t *l; printf (" %5d", p->id); if (IS_WID(p->wid)) printf (" %20s %02d %6d %4s", dict_wordstr(p->wid), p->pos, p->pid, mdef_ciphone_str (mdef, p->ci)); else printf (" %20s %02d %6d %4s", "<phead>", 0, BAD_PID, ""); printf (" %4s %4s", IS_CIPID(p->lc) ? mdef_ciphone_str (mdef, p->lc) : "-", IS_CIPID(p->rc) ? mdef_ciphone_str (mdef, p->rc) : "-"); printf ("\t"); for (l = p->succlist; l; l = l->next) printf (" %5d", l->node->id); printf ("\n"); }
/* Write xlabel style phone segmentation output file */ static void write_phlab(char *dir, align_phseg_t * phseg, char *uttid, char *ctlspec, int32 fps) { char str[1024]; FILE *fp; /* Attempt to write segmentation for this utt to a separate file */ build_output_uttfile(str, dir, uttid, ctlspec); strcat(str, ".lab"); E_INFO("Writing xlabel style phone labels to: %s\n", str); if ((fp = fopen(str, "w")) == NULL) { E_ERROR_SYSTEM("Failed to open file %s for writing", str); fp = stdout; /* Segmentations can be directed to stdout this way */ E_INFO("Phone segmentation (%s):\n", uttid); dir = NULL; /* Flag to indicate fp shouldn't be closed at the end */ } if (!dir) { fprintf(fp, "PH:%s>", uttid); fflush(fp); } fprintf(fp, "#\n"); for (; phseg; phseg = phseg->next) { const char *name; name = mdef_ciphone_str(kbc->mdef, kbc->mdef->phone[phseg->pid].ci); if (!dir) { fprintf(fp, "ph:%s>", uttid); fflush(fp); } fprintf(fp, "%0.6f 125 %s\n", (double) phseg->ef / fps, name); fflush(fp); } if (dir) fclose(fp); else { fprintf(fp, "\n"); fflush(fp); } }
/* Write state segmentation output file */ static void write_stseg (char *dir, align_stseg_t *stseg, char *uttid, char *ctlspec) { char filename[1024]; FILE *fp; align_stseg_t *tmp; int32 i, k; s3cipid_t ci[3]; uint8 pos; char *str; word_posn_t wpos; build_output_uttfile (filename, dir, uttid, ctlspec); strcat (filename, ".stseg"); E_INFO("Writing state segmentation to: %s\n", filename); if ((fp = fopen (filename, "wb")) == NULL) { E_ERROR("fopen(%s,wb) failed\n", filename); return; } /* Write version no. */ if (fwrite ("0.1\n", sizeof(char), 4, fp) != 4) goto write_error; /* Write CI phone names */ for (k = 0; k < mdef->n_ciphone; k++) { str = mdef_ciphone_str (mdef, k); if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str)) goto write_error; if (fwrite (" ", sizeof(char), 1, fp) != 1) goto write_error; } str = WPOS_NAME; if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str)) goto write_error; /* Write format "description" */ str = "\nCI.8 LC.8 RC.8 POS.3(HI)-ST.5(LO) SCR(32)\n"; if (fwrite (str, sizeof(char), strlen(str), fp) != strlen(str)) goto write_error; /* Write binary comment string */ if (fwrite ("*end_comment*\n", sizeof(char), 14, fp) != 14) goto write_error; /* Write byte-ordering magic number */ k = BYTE_ORDER_MAGIC; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write #frames */ for (k = 0, tmp = stseg; tmp; k++, tmp = tmp->next); if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; /* Write state segmentation for each frame */ for (i = 0; stseg; i++, stseg = stseg->next) { mdef_phone_components (mdef, stseg->pid, ci, &(ci[1]), &(ci[2]), &wpos); assert ((wpos >= 0) && (wpos < 8)); assert ((stseg->state >= 0) && (stseg->state < 32)); if (fwrite (ci, sizeof(s3cipid_t), 3, fp) != 3) goto write_error; pos = (wpos << 5) | (stseg->state & 0x001f); if (fwrite (&pos, sizeof(uint8), 1, fp) != 1) goto write_error; k = stseg->score + senscale[i]; if (fwrite (&k, sizeof(int32), 1, fp) != 1) goto write_error; } fclose (fp); return; write_error: E_ERROR("fwrite(%s) failed\n", filename); fclose (fp); }
void word_fsg_write(word_fsg_t * fsg, FILE * fp) { time_t tp; int32 i, j; gnode_t *gn; word_fsglink_t *tl; assert(fsg); assert(fsg->dict); time(&tp); if (tp > 0) fprintf(fp, "%c WORD-FSG; %s\n", WORD_FSG_COMMENT_CHAR, ctime(&tp)); else fprintf(fp, "%c WORD-FSG\n", WORD_FSG_COMMENT_CHAR); fprintf(fp, "%s\n", WORD_FSG_BEGIN_DECL); fprintf(fp, "%c #states\n", WORD_FSG_COMMENT_CHAR); fprintf(fp, "%s %d\n", WORD_FSG_NUM_STATES_DECL, fsg->n_state); fprintf(fp, "%c start-state\n", WORD_FSG_COMMENT_CHAR); fprintf(fp, "%s %d\n", WORD_FSG_START_STATE_DECL, fsg->start_state); fprintf(fp, "%c final-state\n", WORD_FSG_COMMENT_CHAR); fprintf(fp, "%s %d\n", WORD_FSG_FINAL_STATE_DECL, fsg->final_state); fprintf(fp, "%c transitions\n", WORD_FSG_COMMENT_CHAR); fprintf(fp, "%c from-state to-state logs2prob*lw word-ID\n", WORD_FSG_COMMENT_CHAR); for (i = 0; i < fsg->n_state; i++) { for (j = 0; j < fsg->n_state; j++) { /* Print non-null transitions */ for (gn = fsg->trans[i][j]; gn; gn = gnode_next(gn)) { tl = (word_fsglink_t *) gnode_ptr(gn); fprintf(fp, "%c %d %d %d %d\n", WORD_FSG_COMMENT_CHAR, tl->from_state, tl->to_state, tl->logs2prob, tl->wid); fprintf(fp, "%s %d %d %.3e %s\n", WORD_FSG_TRANSITION_DECL, tl->from_state, tl->to_state, EXP(tl->logs2prob / fsg->lw), (tl->wid < 0) ? "" : dict_wordstr(fsg->dict, tl->wid)); } /* Print null transitions */ tl = fsg->null_trans[i][j]; if (tl) { fprintf(fp, "%c %d %d %d\n", WORD_FSG_COMMENT_CHAR, tl->from_state, tl->to_state, tl->logs2prob); fprintf(fp, "%s %d %d %.3e\n", WORD_FSG_TRANSITION_DECL, tl->from_state, tl->to_state, EXP(tl->logs2prob / fsg->lw)); } } } /* Print lc/rc vectors */ if (fsg->lc && fsg->rc) { for (i = 0; i < fsg->n_state; i++) { fprintf(fp, "%c LC[%d]:", WORD_FSG_COMMENT_CHAR, i); for (j = 0; fsg->lc[i][j] >= 0; j++) fprintf(fp, " %s", mdef_ciphone_str(fsg->mdef, fsg->lc[i][j])); fprintf(fp, "\n"); fprintf(fp, "%c RC[%d]:", WORD_FSG_COMMENT_CHAR, i); for (j = 0; fsg->rc[i][j] >= 0; j++) fprintf(fp, " %s", mdef_ciphone_str(fsg->mdef, fsg->rc[i][j])); fprintf(fp, "\n"); } } fprintf(fp, "%c\n", WORD_FSG_COMMENT_CHAR); fprintf(fp, "%s\n", WORD_FSG_END_DECL); fflush(fp); }
static int32 dict_read(FILE * fp, dict_t * d) { char line[16384], **wptr; s3cipid_t p[4096]; int32 lineno, nwd; s3wid_t w; int32 i, maxwd; s3cipid_t ci; int32 ph; maxwd = 4092; wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */ lineno = 0; while (fgets(line, sizeof(line), fp) != NULL) { lineno++; if (line[0] == '#') /* Comment line */ continue; if ((nwd = str2words(line, wptr, maxwd)) < 0) E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd); if (nwd == 0) /* Empty line */ continue; /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ if (nwd == 1) { E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]); continue; } {char * fin; float proba=0.0; int deca=0; proba=strtod(wptr[1],&fin); if (fin !=wptr[1]) deca=1; else proba=0.0; /* Convert pronunciation string to CI-phone-ids */ for (i = 1; i < nwd-deca; i++) { p[i - 1] = dict_ciphone_id(d, wptr[i+deca]); if (NOT_S3CIPID(p[i - 1])) { E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n", lineno, wptr[i], wptr[0]); break; } } if (i == nwd-deca) { /* All CI-phones successfully converted to IDs */ w = dict_add_word(d, wptr[0], p, nwd - 1-deca); if (NOT_S3WID(w)) E_ERROR ("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n", lineno, wptr[0]); d->word[w].proba=proba; } } } if (d->lts_rules) { #if 1 /* Until we allow user to put in a mapping of the phoneset from LTS to the phoneset from mdef, The checking will intrusively stop the recognizer. */ for (ci = 0; ci < mdef_n_ciphone(d->mdef); ci++) { if (!mdef_is_fillerphone(d->mdef, ci)) { for (ph = 0; cmu6_lts_phone_table[ph] != NULL; ph++) { /* E_INFO("%s %s\n",cmu6_lts_phone_table[ph],mdef_ciphone_str(d->mdef,ci)); */ if (!strcmp (cmu6_lts_phone_table[ph], mdef_ciphone_str(d->mdef, ci))) break; } if (cmu6_lts_phone_table[ph] == NULL) { E_FATAL ("A phone in the model definition doesn't appear in the letter to sound ", "rules. \n This is case we don't recommend user to ", "use the built-in LTS. \n Please kindly turn off ", "-lts_mismatch\n"); } } } #endif }
s3pid_t mdef_phone_id_nearest(mdef_t * m, s3cipid_t b, s3cipid_t l, s3cipid_t r, word_posn_t pos) { word_posn_t tmppos; s3pid_t p; s3cipid_t newl, newr; char *wpos_name; assert(m); assert((b >= 0) && (b < m->n_ciphone)); assert((pos >= 0) && (pos < N_WORD_POSN)); if ((NOT_S3CIPID(l)) || (NOT_S3CIPID(r))) return ((s3pid_t) b); assert((l >= 0) && (l < m->n_ciphone)); assert((r >= 0) && (r < m->n_ciphone)); p = mdef_phone_id(m, b, l, r, pos); if (IS_S3PID(p)) return p; /* Exact triphone not found; backoff to other word positions */ for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) { if (tmppos != pos) { p = mdef_phone_id(m, b, l, r, tmppos); if (IS_S3PID(p)) return p; } } /* Nothing yet; backoff to silence phone if non-silence filler context */ if (IS_S3CIPID(m->sil)) { newl = m->ciphone[(int) l].filler ? m->sil : l; newr = m->ciphone[(int) r].filler ? m->sil : r; if ((newl != l) || (newr != r)) { p = mdef_phone_id(m, b, newl, newr, pos); if (IS_S3PID(p)) return p; for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) { if (tmppos != pos) { p = mdef_phone_id(m, b, newl, newr, tmppos); if (IS_S3PID(p)) return p; } } } } /* Nothing yet; backoff to base phone */ if ((m->n_phone > m->n_ciphone) && (!m->ciphone[(int) b].filler)) { wpos_name = WPOS_NAME; #if 0 E_WARN("Triphone(%s,%s,%s,%c) not found; backing off to CIphone\n", mdef_ciphone_str(m, b), mdef_ciphone_str(m, l), mdef_ciphone_str(m, r), wpos_name[pos]); #endif } return ((s3pid_t) b); }
void dict2pid_dump (FILE *fp, dict2pid_t *d2p, mdef_t *mdef, dict_t *dict) { int32 w, p, pronlen; int32 i, j, b, l, r; fprintf (fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n"); for (w = 0; w < dict_size(dict); w++) { fprintf (fp, "%30s ", dict_wordstr(dict, w)); pronlen = dict_pronlen(dict, w); for (p = 0; p < pronlen; p++) fprintf (fp, " %5d", d2p->internal[w][p]); fprintf (fp, "\n"); } fprintf (fp, "#\n"); fprintf (fp, "# LDIPH_LC (b r l ssid)\n"); for (b = 0; b < mdef_n_ciphone(mdef); b++) { for (r = 0; r < mdef_n_ciphone(mdef); r++) { for (l = 0; l < mdef_n_ciphone(mdef); l++) { if (IS_S3SSID(d2p->ldiph_lc[b][r][l])) fprintf (fp, "%6s %6s %6s %5d\n", mdef_ciphone_str (mdef, (s3cipid_t)b), mdef_ciphone_str (mdef, (s3cipid_t)r), mdef_ciphone_str (mdef, (s3cipid_t)l), d2p->ldiph_lc[b][r][l]); /* RAH, ldiph_lc is returning an int32, %d expects an int16 */ } } } fprintf (fp, "#\n"); fprintf (fp, "# SINGLE_LC (b l comssid)\n"); for (b = 0; b < mdef_n_ciphone(mdef); b++) { for (l = 0; l < mdef_n_ciphone(mdef); l++) { if (IS_S3SSID(d2p->single_lc[b][l])) fprintf (fp, "%6s %6s %5d\n", mdef_ciphone_str (mdef, (s3cipid_t)b), mdef_ciphone_str (mdef, (s3cipid_t)l), d2p->single_lc[b][l]); /* RAH, single_lc is returning an int32, %d expects an int16 */ } } fprintf (fp, "#\n"); fprintf (fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq); for (i = 0; i < mdef->n_sseq; i++) { fprintf (fp, "%5d ", i); for (j = 0; j < mdef_n_emit_state(mdef); j++) fprintf (fp, " %5d", mdef->sseq[i][j]); fprintf (fp, "\n"); } fprintf (fp, "#\n"); fprintf (fp, "# COMSSEQ %d (comstate comstate ...)\n", d2p->n_comsseq); for (i = 0; i < d2p->n_comsseq; i++) { fprintf (fp, "%5d ", i); for (j = 0; j < mdef_n_emit_state(mdef); j++) fprintf (fp, " %5d", d2p->comsseq[i][j]); fprintf (fp, "\n"); } fprintf (fp, "#\n"); fprintf (fp, "# COMSTATE %d (senid senid ...)\n", d2p->n_comstate); for (i = 0; i < d2p->n_comstate; i++) { fprintf (fp, "%5d ", i); for (j = 0; IS_S3SENID(d2p->comstate[i][j]); j++) fprintf (fp, " %5d", d2p->comstate[i][j]); fprintf (fp, "\n"); } fprintf (fp, "#\n"); fprintf (fp, "# END\n"); fflush (fp); }