/* * Append a new word to partially build phone-level sentence HMM. (Handle alternative * pronunciations.) Link new word to end phones of previous words. * Append optional filler words before w, if indicated. * Also Link prev_end into the global node list. * Return value: list of end phone nodes for w. (NOTE: these are not yet linked into * the global node list.) */ static pnode_t *append_transcript_word (s3wid_t w, /* Transcript word to be appended */ pnode_t *prev_end, /* Previous end points to be attached to w */ s3wid_t nextw, /* Next word to follow w (ignoring optional fillers) */ int32 prefix_filler, /* Whether optional filler words to precede w */ int32 append_filler) /* Whether optional filler words to follow w */ { int32 i, p; pnode_t *new_end, *tmp_end, *node; s3cipid_t pred_ci[256], succ_ci[256]; s3wid_t fw; if (mdef->n_ciphone >= 256) E_FATAL("Increase pred_ci, succ_ci array sizes to > #CIphones (%d)\n", mdef->n_ciphone); assert (prev_end != NULL); /* Add optional silence/filler words before w, if indicated */ if (prefix_filler) { build_pred_ci (prev_end, pred_ci); /* Predecessor CI list for fillers */ build_succ_ci (w, 0, succ_ci); /* Successor CI list for fillers */ new_end = NULL; for (i = 0; IS_WID(fillwid[i]); i++) { for (fw = fillwid[i]; IS_WID(fw); fw = dict->word[fw].alt) { tmp_end = append_word (fw, prev_end, pred_ci, succ_ci); for (node = tmp_end; node->next; node = node->next); node->next = new_end; new_end = tmp_end; } } /* Augment prev_end with new_end for filler words added above */ for (node = prev_end; node->next; node = node->next); node->next = new_end; } /* Add w */ build_pred_ci (prev_end, pred_ci); /* Predecessor CI list for w */ build_succ_ci (nextw, append_filler, succ_ci); /* Successor CI list for w */ new_end = NULL; for (; IS_WID(w); w = dict->word[w].alt) { tmp_end = append_word (w, prev_end, pred_ci, succ_ci); for (node = tmp_end; node->next; node = node->next); node->next = new_end; new_end = tmp_end; } return (new_end); }
static int32 refline2wds (char *line, dagnode_t *ref, int32 *noov, char *uttid) { int32 i, n, k; s3wid_t w, wid[MAX_UTT_LEN]; n = 0; uttid[0] = '\0'; *noov = 0; if ((n = line2wid (dict, line, wid, MAX_UTT_LEN-1, 1, uttid)) < 0) E_FATAL("Error in parsing ref line: %s\n", line); wid[n++] = silwid; for (i = 0; i < n; i++) { if (dict_filler_word (dict, wid[i]) && (i < n-1)) E_FATAL("Filler word (%s) in ref: %s\n", dict_wordstr(dict, wid[i]), line); if (wid[i] >= oovbegin) { /* Perhaps one of a homophone pair */ w = hom_lookup (wid[i]); if (IS_WID(w)) wid[i] = w; if (wid[i] >= oovbegin) (*noov)++; } wid2dagnode (ref+i, i, wid[i]); } return n; }
main (int32 argc, char *argv[]) { mdef_t *m; dict_t *d; char wd[1024]; s3wid_t wid; int32 p; if (argc < 3) E_FATAL("Usage: %s {mdeffile | NULL} dict [fillerdict]\n", argv[0]); m = (strcmp (argv[1], "NULL") != 0) ? mdef_init (argv[1]) : NULL; d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), '_'); for (;;) { printf ("word> "); scanf ("%s", wd); wid = dict_wordid (d, wd); if (NOT_WID(wid)) E_ERROR("Unknown word\n"); else { for (wid = dict_basewid(d, wid); IS_WID(wid); wid = d->word[wid].alt) { printf ("%s\t", dict_wordstr(d, wid)); for (p = 0; p < d->word[wid].pronlen; p++) printf (" %s", dict_ciphone_str (d, wid, p)); printf ("\n"); } } } }
s3wid_t _dict_nextalt (dict_t *d, s3wid_t wid) { assert (d); assert (IS_WID(wid) && (wid < d->n_word)); return (d->word[wid].alt); }
char *_dict_wordstr (dict_t *d, s3wid_t wid) { assert (d); assert (IS_WID(wid) && (wid < d->n_word)); return (d->word[wid].word); }
static void pronerr_output (char *id, s3wid_t *ref, int32 nref, wseg_t *wseg, s3cipid_t *ap, int8 *ap_err, int32 ws, int32 we, int32 ps, int32 pe) { int32 j; s3wid_t rcwid, lcwid; char str[4096]; /* Word sequence for region in error */ sprintf (str, "%s", dict_wordstr (dict, dict_basewid(dict, ref[ws]))); for (j = ws+1; j <= we; j++) { strcat (str, " "); strcat (str, dict_wordstr (dict, dict_basewid(dict, ref[j]))); } printf ("%-22s\t=>\t", str); /* Print left context phone */ /*lcwid = ((wseg[ws].s < 0) && (ws > 0) && IS_WID(ref[ws-1])) ? ref[ws-1] : BAD_WID;*/ lcwid = (ws > 0) ? ref[ws-1] : BAD_WID; if (IS_WID(lcwid)) { j = dict->word[lcwid].pronlen - 1; sprintf (str, "(%s)", mdef_ciphone_str (mdef, dict->word[lcwid].ciphone[j])); } else strcpy (str, "()"); printf ("%-5s", str); /* Phone sequence for region in error */ for (j = ps; j <= pe; j++) { strcpy (str, mdef_ciphone_str (mdef, ap[j])); if (ap_err[j]) ucase (str); else lcase (str); printf (" %s", str); } /* Right context if ending in error */ /* rcwid = ((wseg[we].e < 0) && IS_WID(ref[we+1])) ? ref[we+1] : BAD_WID; */ rcwid = ref[we+1]; if (IS_WID(rcwid)) printf ("\t(%s)", mdef_ciphone_str (mdef, dict->word[rcwid].ciphone[0])); else printf ("\t()"); printf (" ( %s )\n", id); }
int32 align_init ( void ) { int32 k; s3wid_t w; float64 *f64arg; mdef = mdef_getmdef (); tmat = tmat_gettmat (); dict = dict_getdict (); assert (mdef && tmat && dict); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); silwid = dict_wordid (SILENCE_WORD); if ((NOT_WID(startwid)) || (NOT_WID(finishwid))) E_FATAL("%s or %s not in dictionary\n", START_WORD, FINISH_WORD); if (NOT_WID(silwid)) E_ERROR("%s not in dictionary; no optional silence inserted between words\n", SILENCE_WORD); /* Create list of optional filler words to be inserted between transcript words */ fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3), sizeof(s3wid_t)); k = 0; if (IS_WID(silwid)) fillwid[k++] = silwid; for (w = dict->filler_start; w <= dict->filler_end; w++) { if ((dict_basewid (w) == w) && (w != silwid) && (w != startwid) && (w != finishwid)) fillwid[k++] = w; } fillwid[k] = BAD_WID; f64arg = (float64 *) cmd_ln_access ("-beam"); beam = logs3 (*f64arg); E_INFO ("logs3(beam)= %d\n", beam); score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); hist_head = NULL; align_stseg = NULL; align_phseg = NULL; align_wdseg = NULL; ctr_nstate = counter_new ("NS"); return 0; }
main (int32 argc, char *argv[]) { dict_t **d; int32 i, k, p, wid; char line[16384], *wp[1024]; if (argc < 2) { E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]); exit(0); } d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *)); for (i = 1; i < argc; i++) d[i-1] = dict_init (NULL, argv[i], NULL, 0); while (fgets (line, sizeof(line), stdin) != NULL) { if ((k = str2words (line, wp, 1024)) < 0) E_FATAL("Line too long: %s\n", line); if (k > 2) E_FATAL("Vocab entry contains too many words\n"); if (k == 0) continue; if (k == 1) wp[1] = wp[0]; /* Look up word in each dictionary until found */ k = 0; for (i = 0; (i < argc-1) && (k == 0); i++) { wid = dict_wordid (d[i], wp[1]); if (NOT_WID(wid)) continue; for (wid = dict_basewid(d[i], wid); IS_WID(wid); wid = dict_nextalt(d[i], wid)) { k++; if (k == 1) printf ("%s\t", wp[0]); else printf ("%s(%d)\t", wp[0], k); for (p = 0; p < dict_pronlen(d[i], wid); p++) printf (" %s", dict_ciphone_str (d[i], wid, p)); printf ("\n"); } } if (k == 0) E_ERROR("No pronunciation for: '%s'\n", wp[0]); } }
static void build_succ_ci (s3wid_t w, int32 append_filler, s3cipid_t *succ_ci) { int32 i, p; pnode_t *node; for (p = 0; p < mdef->n_ciphone; p++) succ_ci[p] = 0; for (; IS_WID(w); w = dict->word[w].alt) succ_ci[dict->word[w].ciphone[0]] = 1; if (append_filler) { for (i = 0; IS_WID(fillwid[i]); i++) for (w = fillwid[i]; IS_WID(w); w = dict->word[w].alt) succ_ci[dict->word[w].ciphone[0]] = 1; } i = 0; for (p = 0; p < mdef->n_ciphone; p++) { if (succ_ci[p]) succ_ci[i++] = p; } succ_ci[i] = BAD_CIPID; }
static void dump_pnode_succ (pnode_t *p) { plink_t *l; printf (" %5d", p->id); if (IS_WID(p->wid)) printf (" %20s %02d %6d %4s", dict_wordstr(p->wid), p->pos, p->pid, mdef_ciphone_str (mdef, p->ci)); else printf (" %20s %02d %6d %4s", "<phead>", 0, BAD_PID, ""); printf (" %4s %4s", IS_CIPID(p->lc) ? mdef_ciphone_str (mdef, p->lc) : "-", IS_CIPID(p->rc) ? mdef_ciphone_str (mdef, p->rc) : "-"); printf ("\t"); for (l = p->succlist; l; l = l->next) printf (" %5d", l->node->id); printf ("\n"); }
s3wid_t dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len) { s3wid_t w; int32 i; if (! d->comp_head) return BAD_WID; assert (len > 1); for (w = d->comp_head[wid[0]]; IS_WID(w); w = d->comp_head[w]) { /* w is a compound word beginning with wid[0]; check if rest matches */ assert (d->word[w].n_comp > 1); assert (d->word[w].comp[0] == wid[0]); if (d->word[w].n_comp == len) { for (i = 0; (i < len) && (d->word[w].comp[i] == wid[i]); i++); if (i == len) return (dict_basewid(d, w)); } } return BAD_WID; }
void lmcontext_load (corpus_t *corp, char *uttid, s3wid_t *pred, s3wid_t *succ) { char *str, wd[4096], *strp; s3wid_t w[3]; int32 i, n; dict_t *dict; s3lmwid_t lwid; if ((str = corpus_lookup (corp, uttid)) == NULL) E_FATAL("Couldn't find LM context for %s\n", uttid); dict = dict_getdict (); strp = str; for (i = 0; i < 4; i++) { if (sscanf (strp, "%s%n", wd, &n) != 1) { if (i < 3) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); else break; } strp += n; if (strcmp (wd, "-") == 0) w[i] = BAD_WID; else { w[i] = dict_wordid (wd); if (NOT_WID(w[i])) E_FATAL("LM context word (%s) for %s not in dictionary\n", wd, uttid); w[i] = dict_basewid(w[i]); switch (i) { case 0: if ((n = dict->word[w[0]].n_comp) > 0) w[0] = dict->word[w[0]].comp[n-1].wid; break; case 1: if ((n = dict->word[w[1]].n_comp) > 0) { w[0] = dict->word[w[1]].comp[n-2].wid; w[1] = dict->word[w[1]].comp[n-1].wid; } break; case 2: if (w[2] != dict_wordid(FINISH_WORD)) E_FATAL("Illegal successor LM context for %s: %s\n", uttid, str); break; default: assert (0); /* Should never get here */ break; } } } if (IS_WID(w[0]) && NOT_WID(w[1])) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); for (i = 0; i < 3; i++) { if (IS_WID(w[i])) { lwid = lm_lmwid (w[i]); if (NOT_LMWID(lwid)) E_FATAL("LM context word (%s) for %s not in LM\n", wd, uttid); } } pred[0] = w[0]; pred[1] = w[1]; *succ = w[2]; }
static wseg_t *line2wseg (char *line, s3wid_t *ref, s3cipid_t *ap, int8 *ap_err, int32 aplen, char *id) { char word[1024], uttid[1024], *lp; int32 i, k, n_hypci, n_refwd, n_refci, pronlen; s3cipid_t ci; typedef enum {CORR=0, REFERR=1, HYPERR=2} state_t; state_t state; static wseg_t *wseg = NULL; if (! wseg) wseg = (wseg_t *) ckd_calloc (MAX_UTT_LEN, sizeof(wseg_t)); lp = line; n_hypci = n_refci = pronlen = 0; n_refwd = -1; uttid[0] = '\0'; state = CORR; while (sscanf (lp, "%s%n", word, &k) == 1) { lp += k; if (is_uttid (word, uttid)) break; if (strcmp (word, "[[") == 0) { if (state != CORR) E_FATAL("%s: Illegal [[\n", id); state = REFERR; if (n_refci < pronlen) wseg[n_refwd].err = 1; } else if (strcmp (word, "]]") == 0) { if (state != HYPERR) E_FATAL("%s: Illegal ]]\n", id); state = CORR; } else if (strcmp (word, "=>") == 0) { if (state != REFERR) E_FATAL("%s: Illegal =>\n", id); state = HYPERR; } else { ci = mdef_ciphone_id (mdef, word); if (NOT_CIPID(ci)) E_FATAL("%s: Unknown CIphone %s\n", id, word); if (state != HYPERR) { /* Check if matches next pron in ref word */ if (n_refci >= pronlen) { assert (n_refci == pronlen); n_refwd++; pronlen = dict->word[ref[n_refwd]].pronlen; assert (pronlen > 0); wseg[n_refwd].s = (state == CORR) ? n_hypci : -1; wseg[n_refwd].e = -1; wseg[n_refwd].err = 0; n_refci = 0; } if (NOT_WID(ref[n_refwd])) E_FATAL("%s: Premature end of ref wid\n", id); if (dict->word[ref[n_refwd]].ciphone[n_refci] != ci) E_FATAL("%s: CIphone mismatch at word %d, ciphone %d\n", id, n_refwd, n_refci); n_refci++; if ((n_refci == pronlen) && (state == CORR)) wseg[n_refwd].e = n_hypci; if (state != CORR) wseg[n_refwd].err = 1; } if (state != REFERR) { if (n_hypci >= aplen) E_FATAL("%s: Too many CIphones: >%d\n", id, aplen); ap[n_hypci] = ci; ap_err[n_hypci] = (state == CORR) ? 0 : 1; n_hypci++; } } } assert (n_refci == pronlen); n_refwd++; assert (NOT_WID(ref[n_refwd])); wseg[n_refwd].s = wseg[n_refwd].e = n_hypci; wseg[n_refwd].err = 0; ap[n_hypci] = BAD_CIPID; ap_err[n_hypci] = 1; if (strcmp (uttid, id) != 0) E_FATAL("Uttid mismatch: %s expected, %s found\n", id, uttid); #if 0 for (i = 0; IS_WID(ref[i]); i++) { printf ("%s: %4d %4d %d %s\n", id, wseg[i].s, wseg[i].e, wseg[i].err, dict_wordstr (dict, ref[i])); } #endif return wseg; }
/* * Build a sentence HMM for the given transcription (wordstr). A two-level DAG is * built: phone-level and state-level. * - <s> and </s> always added at the beginning and end of sentence to form an * augmented transcription. * - Optional <sil> and noise words added between words in the augmented * transcription. * wordstr must contain only the transcript; no extraneous stuff such as utterance-id. * Phone-level HMM structure has replicated nodes to allow for different left and right * context CI phones; hence, each pnode corresponds to a unique triphone in the sentence * HMM. * Return 0 if successful, <0 if any error (eg, OOV word encountered). */ int32 align_build_sent_hmm (char *wordstr) { s3wid_t w, nextw; int32 k, oov; pnode_t *word_end, *node; char *wd, delim, *wdcopy; /* HACK HACKA HACK BHIKSHA */ int32 firsttime = 1; /* END HACK HACKA HACK */ /* Initialize dummy head and tail entries of sent hmm */ phead.wid = BAD_WID; phead.ci = BAD_CIPID; phead.lc = BAD_CIPID; /* No predecessor */ phead.rc = BAD_CIPID; /* Any phone can follow head */ phead.pid = BAD_PID; phead.succlist = NULL; phead.predlist = NULL; phead.next = NULL; /* Will ultimately be the head of list of all pnodes */ phead.id = -1; /* Hardwired */ phead.startstate = NULL; ptail.wid = BAD_WID; ptail.ci = BAD_CIPID; ptail.lc = BAD_CIPID; /* Any phone can precede tail */ ptail.rc = BAD_CIPID; /* No successor */ ptail.pid = BAD_PID; ptail.succlist = NULL; ptail.predlist = NULL; ptail.next = NULL; ptail.id = -2; /* Hardwired */ ptail.startstate = NULL; n_pnode = 0; pnode_list = NULL; oov = 0; /* State-level DAG initialization should be here in case the build is aborted */ shead.pnode = &phead; shead.succlist = NULL; shead.predlist = NULL; shead.sen = BAD_SENID; shead.state = mdef->n_emit_state; shead.hist = NULL; stail.pnode = &ptail; stail.succlist = NULL; stail.predlist = NULL; stail.sen = BAD_SENID; stail.state = 0; stail.hist = NULL; /* Obtain the first transcript word */ k = nextword (wordstr, " \t\n", &wd, &delim); if (k < 0) nextw = finishwid; else { wordstr = wd + k; wdcopy = ckd_salloc (wd); *wordstr = delim; nextw = dict_wordid (wdcopy); if (IS_WID(nextw)) nextw = dict_basewid (nextw); } /* Create node(s) for <s> before any transcript word */ /* HACK HACKA HACK BHIKSHA word_end = append_transcript_word (startwid, &phead, nextw, 0, 1); END HACK HACKA HACK BHIKSHA */ /* Append each word in transcription to partial sent HMM created so far */ while (k >= 0) { w = nextw; if (NOT_WID(w)) { E_ERROR("%s not in dictionary\n", wdcopy); oov = 1; /* Hack!! Temporarily set w to some dummy just to run through sentence */ w = finishwid; } ckd_free (wdcopy); k = nextword (wordstr, " \t\n", &wd, &delim); if (k < 0) nextw = finishwid; else { wordstr = wd + k; wdcopy = ckd_salloc (wd); *wordstr = delim; nextw = dict_wordid (wdcopy); if (IS_WID(nextw)) nextw = dict_basewid (nextw); } /* HACK HACKA HACK BHIKSHA */ if (firsttime){ word_end = append_transcript_word (w, &phead, nextw, 0, 1); firsttime = 0; } else if (nextw == finishwid) word_end = append_transcript_word (w, word_end, BAD_WID, 1, 0); else word_end = append_transcript_word (w, word_end, nextw, 1, 1); /* END HACK HACKA HACK BHIKSHA */ } if (oov) return -1; /* Append phone HMMs for </s> at the end; link to tail node */ /* HACK HACKA HACK BHIKSHA word_end = append_transcript_word (finishwid, word_end, BAD_WID, 1, 0); END HACK HACKA HACK BHIKSHA */ for (node = word_end; node; node = node->next) link_pnodes (node, &ptail); /* Build state-level DAG from the phone-level one */ build_state_dag (); /* Dag must begin and end at shead and stail, respectively */ assert (shead.succlist); assert (stail.predlist); assert (! shead.predlist); assert (! stail.succlist); #if _DEBUG_ALIGN_ dump_sent_hmm (); /* For debugging */ #endif k = n_pnode * mdef->n_emit_state; if (k > active_list_size) { /* Need to grow active list arrays */ if (active_list_size > 0) { ckd_free (cur_active); ckd_free (next_active); } for (; active_list_size <= k; active_list_size += ACTIVE_LIST_SIZE_INCR); cur_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); next_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); } return 0; }