/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* LM */ lm_read ((char *) cmd_ln_access("-lmfn"), ""); /* Filler penalties */ fillpen_init ((char *) cmd_ln_access("-fillpenfn"), dict->filler_start, dict->filler_end); }
int32 align_init ( void ) { int32 k; s3wid_t w; float64 *f64arg; mdef = mdef_getmdef (); tmat = tmat_gettmat (); dict = dict_getdict (); assert (mdef && tmat && dict); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); silwid = dict_wordid (SILENCE_WORD); if ((NOT_WID(startwid)) || (NOT_WID(finishwid))) E_FATAL("%s or %s not in dictionary\n", START_WORD, FINISH_WORD); if (NOT_WID(silwid)) E_ERROR("%s not in dictionary; no optional silence inserted between words\n", SILENCE_WORD); /* Create list of optional filler words to be inserted between transcript words */ fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3), sizeof(s3wid_t)); k = 0; if (IS_WID(silwid)) fillwid[k++] = silwid; for (w = dict->filler_start; w <= dict->filler_end; w++) { if ((dict_basewid (w) == w) && (w != silwid) && (w != startwid) && (w != finishwid)) fillwid[k++] = w; } fillwid[k] = BAD_WID; f64arg = (float64 *) cmd_ln_access ("-beam"); beam = logs3 (*f64arg); E_INFO ("logs3(beam)= %d\n", beam); score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); hist_head = NULL; align_stseg = NULL; align_phseg = NULL; align_wdseg = NULL; ctr_nstate = counter_new ("NS"); return 0; }
main (int32 argc, char *argv[]) { mdef_t *m; dict_t *d; char wd[1024]; s3wid_t wid; int32 p; if (argc < 3) E_FATAL("Usage: %s {mdeffile | NULL} dict [fillerdict]\n", argv[0]); m = (strcmp (argv[1], "NULL") != 0) ? mdef_init (argv[1]) : NULL; d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), '_'); for (;;) { printf ("word> "); scanf ("%s", wd); wid = dict_wordid (d, wd); if (NOT_WID(wid)) E_ERROR("Unknown word\n"); else { for (wid = dict_basewid(d, wid); IS_WID(wid); wid = d->word[wid].alt) { printf ("%s\t", dict_wordstr(d, wid)); for (p = 0; p < d->word[wid].pronlen; p++) printf (" %s", dict_ciphone_str (d, wid, p)); printf ("\n"); } } } }
static void homfile_load (char *file) { FILE *fp; char line[16380], w1[4096], w2[4096]; int32 k, n; s3wid_t wid1, wid2; s3cipid_t ci[1]; hom_t *h; E_INFO("Reading homophones file %s\n", file); if ((fp = fopen(file, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", file); ci[0] = (s3cipid_t) 0; /* Dummy */ n = 0; while (fgets (line, sizeof(line), fp) != NULL) { if ((k = sscanf (line, "%s %s", w1, w2)) == 2) { wid1 = dict_wordid (dict, w1); if (NOT_WID(wid1)) { E_INFO("Adding %s to dictionary\n", w1); wid1 = dict_add_word (dict, w1, ci, 1); if (NOT_WID(wid1)) E_FATAL("dict_add_word(%s) failed\n", w1); } wid2 = dict_wordid (dict, w2); if ((NOT_WID(wid2)) || (wid2 >= oovbegin)) E_FATAL("%s not in dictionary\n", w2); h = (hom_t *) listelem_alloc (sizeof(hom_t)); h->w1 = wid1; h->w2 = wid2; h->next = homlist; homlist = h; n++; } else E_FATAL("Bad homophones line: %s\n", line); } E_INFO("%d homophone pairs read\n", n); fclose (fp); }
/* * Create a degenerate DAG (linear sequence of nodes) for the given hyp line. * The DAG contains a terminal sentinel silwid node. */ static dag_t *hypline2dag (char *ref_uttid, char *line) { char junk1[4096], junk2[4096], uttid[4096]; s3wid_t wid[MAX_UTT_LEN]; int32 i, n; dag_t *dag; dagnode_t *d; if ((n = line2wid (dict, line, wid, MAX_UTT_LEN-1, 0, uttid)) < 0) E_FATAL("Error in parsing hyp line: %s\n", line); /* Verify uttid with ref_uttid */ if (strcmp (uttid, ref_uttid) != 0) { strcpy (junk1, uttid); ucase (junk1); strcpy (junk2, ref_uttid); ucase (junk2); if (strcmp (junk1, junk2) != 0) E_FATAL("Uttid mismatch: %s(ref), %s(hyp)\n", ref_uttid, uttid); } /* Build DAG from word sequence */ dag = ckd_calloc (1, sizeof(dag_t)); dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *)); dag->nnode = 0; dag->nfrm = 0; dag->nlink = 0; for (i = 0; i < n; i++) { if ((NOT_WID(wid[i])) || (wid[i] >= oovbegin)) E_FATAL("%s: Unknown word in line: %s\n", uttid, line); /* Create DAG node for word */ d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t)); wid2dagnode (d, i, wid[i]); dag->node_sf[i] = d; if (i > 0) { dag_link (dag->node_sf[i-1], d); dag->nlink++; } dag->nnode++; } dag->nfrm = dag->nnode; dag->entry.src = NULL; dag->entry.dst = dag->node_sf[0]; dag->entry.next = NULL; dag->exit.src = NULL; dag->exit.dst = dag->node_sf[dag->nnode - 1]; dag->exit.next = NULL; return dag; }
int32 line2wid (dict_t *dict, char *line, s3wid_t *wid, int32 max_n_wid, int32 add_oov, char *uttid) { char *lp, word[1024]; int32 n, k; s3wid_t w; s3cipid_t ci[1]; uttid[0] = '\0'; ci[0] = (s3cipid_t) 0; lp = line; n = 0; while (sscanf (lp, "%s%n", word, &k) == 1) { lp += k; if (n >= max_n_wid) return -n; if (is_uttid (word, uttid)) break; wid[n] = dict_wordid (dict, word); /* Up to caller to handle BAD_WIDs */ if (NOT_WID(wid[n])) { /* OOV word */ if (add_oov) { E_INFO("Adding %s to dictionary\n", word); wid[n] = dict_add_word (dict, word, NULL, 0); if (NOT_WID(wid[n])) E_FATAL("dict_add_word(%s) failed for line: %s\n", word, line); } else E_FATAL("Unknown word (%s) in line: %s\n", word, line); } n++; } if (sscanf (lp, "%s", word) == 1) /* Check that line really ended */ E_WARN("Nonempty data ignored after uttid(%s) in line: %s\n", uttid, line); return n; }
/* * Scan the dictionary for compound words. This function should be called just after * loading the dictionary. For the moment, compound words in a compound word are * assumed to be separated by the given sep character, (underscore in the CMU dict). * Return value: #compound words found in dictionary. */ static int32 dict_build_comp (dict_t *d, char sep) /* Separator character */ { char wd[4096]; int32 w, cwid; dictword_t *wordp; int32 nc; /* # compound words in dictionary */ int32 i, j, l, n; nc = 0; for (w = 0; w < d->n_word; w++) { wordp = d->word + dict_basewid(d, w); strcpy (wd, wordp->word); l = strlen(wd); if ((wd[0] == sep) || (wd[l-1] == sep)) E_FATAL("Bad compound word %s: leading or trailing separator\n", wordp->word); /* Count no. of components in this word */ n = 1; for (i = 1; i < l-1; i++) /* 0 and l-1 already checked above */ if (wd[i] == sep) n++; if (n == 1) continue; /* Not a compound word */ nc++; if ((w == d->startwid) || (w == d->finishwid) || dict_filler_word (d, w)) E_FATAL("Compound special/filler word (%s) not allowed\n", wordp->word); /* Allocate and fill in component word info */ wordp->n_comp = n; wordp->comp = (s3wid_t *) ckd_calloc (n, sizeof(s3wid_t)); /* Parse word string into components */ n = 0; for (i = 0; i < l; i++) { for (j = i; (i < l) && (wd[i] != sep); i++); if (j == i) E_FATAL("Bad compound word %s: successive separators\n", wordp->word); wd[i] = '\0'; cwid = dict_wordid (d, wd+j); if (NOT_WID(cwid)) E_FATAL("Component word %s of %s not in dictionary\n", wd+j, wordp->word); wordp->comp[n] = cwid; n++; } } if (nc > 0) d->comp_head = dict_comp_head (d); return nc; }
static void dump_pnode_info (pnode_t *p) { if (NOT_WID(p->wid)) printf ("%s", (p->id == -1) ? "<head>" : "<tail>"); else printf ("%s.%d.", dict_wordstr(p->wid), p->pos, mdef_ciphone_str (mdef, p->ci)); printf ("%s", IS_CIPID(p->lc) ? mdef_ciphone_str (mdef, p->lc) : "-"); printf ("(%s)", IS_CIPID(p->ci) ? mdef_ciphone_str (mdef, p->ci) : "-"); printf ("%s", IS_CIPID(p->rc) ? mdef_ciphone_str (mdef, p->rc) : "-"); }
main (int32 argc, char *argv[]) { dict_t **d; int32 i, k, p, wid; char line[16384], *wp[1024]; if (argc < 2) { E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]); exit(0); } d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *)); for (i = 1; i < argc; i++) d[i-1] = dict_init (NULL, argv[i], NULL, 0); while (fgets (line, sizeof(line), stdin) != NULL) { if ((k = str2words (line, wp, 1024)) < 0) E_FATAL("Line too long: %s\n", line); if (k > 2) E_FATAL("Vocab entry contains too many words\n"); if (k == 0) continue; if (k == 1) wp[1] = wp[0]; /* Look up word in each dictionary until found */ k = 0; for (i = 0; (i < argc-1) && (k == 0); i++) { wid = dict_wordid (d[i], wp[1]); if (NOT_WID(wid)) continue; for (wid = dict_basewid(d[i], wid); IS_WID(wid); wid = dict_nextalt(d[i], wid)) { k++; if (k == 1) printf ("%s\t", wp[0]); else printf ("%s(%d)\t", wp[0], k); for (p = 0; p < dict_pronlen(d[i], wid); p++) printf (" %s", dict_ciphone_str (d[i], wid, p)); printf ("\n"); } } if (k == 0) E_ERROR("No pronunciation for: '%s'\n", wp[0]); } }
static int32 dict_read (FILE *fp, dict_t *d) { char line[16384], **wptr; s3cipid_t p[4096]; int32 lineno, nwd; s3wid_t w; int32 i, maxwd; maxwd = 4092; wptr = (char **) ckd_calloc (maxwd, sizeof(char *)); lineno = 0; while (fgets (line, sizeof(line), fp) != NULL) { lineno++; if (line[0] == '#') /* Comment line */ continue; if ((nwd = str2words (line, wptr, maxwd)) < 0) E_FATAL("str2words(%s) failed; Increase maxwd from %d\n", line, maxwd); if (nwd == 0) /* Empty line */ continue; /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ if (nwd == 1) { E_ERROR("Line %d: No pronunciation for word %s; ignored\n", lineno, wptr[0]); continue; } /* Convert pronunciation string to CI-phone-ids */ for (i = 1; i < nwd; i++) { p[i-1] = dict_ciphone_id (d, wptr[i]); if (NOT_CIPID(p[i-1])) { E_ERROR("Line %d: Bad ciphone: %s; word %s ignored\n", lineno, wptr[i], wptr[0]); break; } } if (i == nwd) { /* All CI-phones successfully converted to IDs */ w = dict_add_word (d, wptr[0], p, nwd-1); if (NOT_WID(w)) E_ERROR("Line %d: dict_add_word (%s) failed (duplicate?); ignored\n", lineno, wptr[0]); } } ckd_free (wptr); return 0; }
static void process_reffile (char *reffile) { FILE *rfp, *afp; char line[16384], uttid[4096]; int32 i, k, nref; s3wid_t ref[MAX_UTT_LEN]; s3cipid_t ap[MAX_UTT_LEN]; int8 ap_err[MAX_UTT_LEN]; wseg_t *wseg; if ((rfp = fopen(reffile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", reffile); afp = stdin; /* DP file read in from stdin */ while (fgets(line, sizeof(line), rfp) != NULL) { if ((nref = line2wid (dict, line, ref, MAX_UTT_LEN-1, 0, uttid)) < 0) E_FATAL("Bad line in file %s: %s\n", reffile, line); /* Check for unknown words; remove filler words; terminate with BAD_WID */ k = 0; for (i = 0; i < nref; i++) { if (NOT_WID(ref[i])) E_FATAL("Unknown word at position %d in line: %s\n", i, line); if (! dict_filler_word (dict, ref[i])) ref[k++] = ref[i]; } ref[k++] = BAD_WID; nref = k; /* Build wseg map for DP line */ if (fgets (line, sizeof(line), afp) == NULL) E_FATAL("Unexpected EOF(DP-file)\n"); wseg = line2wseg (line, ref, ap, ap_err, MAX_UTT_LEN-1, uttid); pronerr (uttid, ref, nref, wseg, ap, ap_err); } fclose (rfp); }
dict_t *dict_init (mdef_t *mdef, char *dictfile, char *fillerfile, char comp_sep) { FILE *fp, *fp2; int32 n ; char line[1024]; dict_t *d; if (! dictfile) E_FATAL("No dictionary file\n"); /* * First obtain #words in dictionary (for hash table allocation). * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate * all the required memory in one go. */ if ((fp = fopen(dictfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", dictfile); n = 0; while (fgets (line, sizeof(line), fp) != NULL) { if (line[0] != '#') n++; } rewind (fp); if (fillerfile) { if ((fp2 = fopen(fillerfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", fillerfile); while (fgets (line, sizeof(line), fp2) != NULL) { if (line[0] != '#') n++; } rewind (fp2); } /* * Allocate dict entries. HACK!! Allow some extra entries for words not in file. * Also check for type size restrictions. */ d = (dict_t *) ckd_calloc (1, sizeof(dict_t)); d->max_words = (n+1024 < MAX_WID) ? n+1024 : MAX_WID; if (n >= MAX_WID) E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_WID); d->word = (dictword_t *) ckd_calloc (d->max_words, sizeof(dictword_t)); d->n_word = 0; d->mdef = mdef; if (mdef) { d->pht = NULL; d->ciphone_str = NULL; } else { d->pht = hash_new (DEFAULT_NUM_PHONE, 1 /* No case */); d->ciphone_str = (char **) ckd_calloc (DEFAULT_NUM_PHONE, sizeof(char *)); } d->n_ciphone = 0; /* Create new hash table for word strings; case-insensitive word strings */ d->ht = hash_new (d->max_words, 1 /* no-case */); /* Initialize with no compound words */ d->comp_head = NULL; /* Digest main dictionary file */ E_INFO("Reading main dictionary: %s\n", dictfile); dict_read (fp, d); fclose (fp); E_INFO("%d words read\n", d->n_word); /* Now the filler dictionary file, if it exists */ d->filler_start = d->n_word; if (fillerfile) { E_INFO("Reading filler dictionary: %s\n", fillerfile); dict_read (fp2, d); fclose (fp2); E_INFO("%d words read\n", d->n_word - d->filler_start); } d->filler_end = d->n_word-1; /* Initialize distinguished word-ids */ d->startwid = dict_wordid (d, START_WORD); d->finishwid = dict_wordid (d, FINISH_WORD); d->silwid = dict_wordid (d, SILENCE_WORD); if (NOT_WID(d->startwid)) E_WARN("%s not in dictionary\n", START_WORD); if (NOT_WID(d->finishwid)) E_WARN("%s not in dictionary\n", FINISH_WORD); if (NOT_WID(d->silwid)) E_WARN("%s not in dictionary\n", SILENCE_WORD); /* Identify compound words if indicated */ if (comp_sep) { E_INFO("Building compound words (separator = '%c')\n", comp_sep); n = dict_build_comp (d, comp_sep); E_INFO("%d compound words\n", n); } return d; }
/* * Build a sentence HMM for the given transcription (wordstr). A two-level DAG is * built: phone-level and state-level. * - <s> and </s> always added at the beginning and end of sentence to form an * augmented transcription. * - Optional <sil> and noise words added between words in the augmented * transcription. * wordstr must contain only the transcript; no extraneous stuff such as utterance-id. * Phone-level HMM structure has replicated nodes to allow for different left and right * context CI phones; hence, each pnode corresponds to a unique triphone in the sentence * HMM. * Return 0 if successful, <0 if any error (eg, OOV word encountered). */ int32 align_build_sent_hmm (char *wordstr) { s3wid_t w, nextw; int32 k, oov; pnode_t *word_end, *node; char *wd, delim, *wdcopy; /* HACK HACKA HACK BHIKSHA */ int32 firsttime = 1; /* END HACK HACKA HACK */ /* Initialize dummy head and tail entries of sent hmm */ phead.wid = BAD_WID; phead.ci = BAD_CIPID; phead.lc = BAD_CIPID; /* No predecessor */ phead.rc = BAD_CIPID; /* Any phone can follow head */ phead.pid = BAD_PID; phead.succlist = NULL; phead.predlist = NULL; phead.next = NULL; /* Will ultimately be the head of list of all pnodes */ phead.id = -1; /* Hardwired */ phead.startstate = NULL; ptail.wid = BAD_WID; ptail.ci = BAD_CIPID; ptail.lc = BAD_CIPID; /* Any phone can precede tail */ ptail.rc = BAD_CIPID; /* No successor */ ptail.pid = BAD_PID; ptail.succlist = NULL; ptail.predlist = NULL; ptail.next = NULL; ptail.id = -2; /* Hardwired */ ptail.startstate = NULL; n_pnode = 0; pnode_list = NULL; oov = 0; /* State-level DAG initialization should be here in case the build is aborted */ shead.pnode = &phead; shead.succlist = NULL; shead.predlist = NULL; shead.sen = BAD_SENID; shead.state = mdef->n_emit_state; shead.hist = NULL; stail.pnode = &ptail; stail.succlist = NULL; stail.predlist = NULL; stail.sen = BAD_SENID; stail.state = 0; stail.hist = NULL; /* Obtain the first transcript word */ k = nextword (wordstr, " \t\n", &wd, &delim); if (k < 0) nextw = finishwid; else { wordstr = wd + k; wdcopy = ckd_salloc (wd); *wordstr = delim; nextw = dict_wordid (wdcopy); if (IS_WID(nextw)) nextw = dict_basewid (nextw); } /* Create node(s) for <s> before any transcript word */ /* HACK HACKA HACK BHIKSHA word_end = append_transcript_word (startwid, &phead, nextw, 0, 1); END HACK HACKA HACK BHIKSHA */ /* Append each word in transcription to partial sent HMM created so far */ while (k >= 0) { w = nextw; if (NOT_WID(w)) { E_ERROR("%s not in dictionary\n", wdcopy); oov = 1; /* Hack!! Temporarily set w to some dummy just to run through sentence */ w = finishwid; } ckd_free (wdcopy); k = nextword (wordstr, " \t\n", &wd, &delim); if (k < 0) nextw = finishwid; else { wordstr = wd + k; wdcopy = ckd_salloc (wd); *wordstr = delim; nextw = dict_wordid (wdcopy); if (IS_WID(nextw)) nextw = dict_basewid (nextw); } /* HACK HACKA HACK BHIKSHA */ if (firsttime){ word_end = append_transcript_word (w, &phead, nextw, 0, 1); firsttime = 0; } else if (nextw == finishwid) word_end = append_transcript_word (w, word_end, BAD_WID, 1, 0); else word_end = append_transcript_word (w, word_end, nextw, 1, 1); /* END HACK HACKA HACK BHIKSHA */ } if (oov) return -1; /* Append phone HMMs for </s> at the end; link to tail node */ /* HACK HACKA HACK BHIKSHA word_end = append_transcript_word (finishwid, word_end, BAD_WID, 1, 0); END HACK HACKA HACK BHIKSHA */ for (node = word_end; node; node = node->next) link_pnodes (node, &ptail); /* Build state-level DAG from the phone-level one */ build_state_dag (); /* Dag must begin and end at shead and stail, respectively */ assert (shead.succlist); assert (stail.predlist); assert (! shead.predlist); assert (! stail.succlist); #if _DEBUG_ALIGN_ dump_sent_hmm (); /* For debugging */ #endif k = n_pnode * mdef->n_emit_state; if (k > active_list_size) { /* Need to grow active list arrays */ if (active_list_size > 0) { ckd_free (cur_active); ckd_free (next_active); } for (; active_list_size <= k; active_list_size += ACTIVE_LIST_SIZE_INCR); cur_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); next_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); } return 0; }
/* * Load a DAG from a file: each unique <word-id,start-frame> is a node, i.e. with * a single start time but it can represent several end times. Links are created * whenever nodes are adjacent in time. * Return value: ptr to DAG structure if successful; NULL otherwise. */ dag_t *dag_load (char *file) { FILE *fp; dag_t *dag; int32 seqid, sf, fef, lef, ef; char line[16384], wd[4096]; int32 i, j, k; dagnode_t *d, *d2, **darray; s3wid_t w; int32 fudge, min_ef_range; E_INFO("Reading DAG file: %s\n", file); if ((fp = fopen (file, "r")) == NULL) { E_ERROR("fopen(%s,r) failed\n", file); return NULL; } dag = ckd_calloc (1, sizeof(dag_t)); dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *)); dag->nnode = 0; dag->nlink = 0; dag->nfrm = 0; /* Read Frames parameter */ if ((dag->nfrm = dag_param_read (fp, "Frames")) <= 0) E_FATAL("%s: Frames parameter missing or invalid\n", file); /* Read Nodes parameter */ if ((dag->nnode = dag_param_read (fp, "Nodes")) <= 0) E_FATAL("%s: Nodes parameter missing or invalid\n", file); /* Read nodes */ darray = (dagnode_t **) ckd_calloc (dag->nnode, sizeof(dagnode_t *)); for (i = 0; i < dag->nnode; i++) { if (fgets (line, sizeof(line), fp) == NULL) E_FATAL("%s: Premature EOF\n", file); if ((k = sscanf (line, "%d %s %d %d %d", &seqid, wd, &sf, &fef, &lef)) != 5) E_FATAL("%s: Bad line: %s\n", file, line); if ((sf < 0) || (sf >= dag->nfrm) || (fef < 0) || ( fef >= dag->nfrm) || (lef < 0) || ( lef >= dag->nfrm)) E_FATAL("%s: Bad frame info: %s\n", file, line); w = dict_wordid (dict, wd); if (NOT_WID(w)) E_FATAL("%s: Unknown word: %s\n", file, line); if (seqid != i) E_FATAL("%s: Seqno error: %s\n", file, line); d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t)); darray[i] = d; d->wid = w; d->seqid = seqid; d->reachable = 0; d->sf = sf; d->fef = fef; d->lef = lef; d->succlist = NULL; d->predlist = NULL; d->next = dag->node_sf[sf]; dag->node_sf[sf] = d; } /* Read initial node ID */ if (((k = dag_param_read (fp, "Initial")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Initial node parameter missing or invalid\n", file); dag->entry.src = NULL; dag->entry.dst = darray[k]; dag->entry.next = NULL; /* Read final node ID */ if (((k = dag_param_read (fp, "Final")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Final node parameter missing or invalid\n", file); dag->exit.src = NULL; dag->exit.dst = darray[k]; dag->exit.next = NULL; ckd_free (darray); /* That's all I need darray for??? */ /* Read bestsegscore entries; just to make sure all nodes have been read */ if ((k = dag_param_read (fp, "BestSegAscr")) < 0) E_FATAL("%s: BestSegAscr parameter missing\n", file); fclose (fp); /* * Build edges based on time-adjacency. * min_ef_range = min. endframes that a node must persist for it to be not ignored. * fudge = #frames to be fudged around word begin times */ min_ef_range = *((int32 *) cmd_ln_access ("-min_endfr")); fudge = *((int32 *) cmd_ln_access ("-dagfudge")); if (min_ef_range <= 0) E_FATAL("Bad min_endfr argument: %d\n", min_ef_range); if ((fudge < 0) || (fudge > 2)) E_FATAL("Bad dagfudge argument: %d\n", fudge); dag->nlink = 0; for (sf = 0; sf < dag->nfrm; sf++) { for (d = dag->node_sf[sf]; d; d = d->next) { if ((d->lef - d->fef < min_ef_range - 1) && (d != dag->entry.dst)) continue; if (d->wid == finishwid) continue; for (ef = d->fef - fudge + 1; ef <= d->lef + 1; ef++) { for (d2 = dag->node_sf[ef]; d2; d2 = d2->next) { if ((d2->lef - d2->fef < min_ef_range - 1) && (d2 != dag->exit.dst)) continue; dag_link (d, d2); dag->nlink++; } } } } return dag; }
/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { float32 varfloor, mixwfloor, tpfloor; int32 i, s; s3cipid_t ci; s3wid_t w; char *arg; dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* Codebooks */ varfloor = *((float32 *) cmd_ln_access("-varfloor")); g = gauden_init ((char *) cmd_ln_access("-meanfn"), (char *) cmd_ln_access("-varfn"), varfloor); /* Verify codebook feature dimensions against libfeat */ n_feat = feat_featsize (&featlen); if (n_feat != g->n_feat) E_FATAL("#feature mismatch: s2= %d, mean/var= %d\n", n_feat, g->n_feat); for (i = 0; i < n_feat; i++) if (featlen[i] != g->featlen[i]) E_FATAL("featlen[%d] mismatch: s2= %d, mean/var= %d\n", i, featlen[i], g->featlen[i]); /* Senone mixture weights */ mixwfloor = *((float32 *) cmd_ln_access("-mwfloor")); sen = senone_init ((char *) cmd_ln_access("-mixwfn"), (char *) cmd_ln_access("-senmgaufn"), mixwfloor); /* Verify senone parameters against gauden parameters */ if (sen->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, sen->n_feat); if (sen->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, sen->n_cw); if (sen->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); if (sen->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); /* Verify senone parameters against model definition parameters */ if (mdef->n_sen != sen->n_sen) E_FATAL("Model definition has %d senones; but #senone= %d\n", mdef->n_sen, sen->n_sen); /* CD/CI senone interpolation weights file, if present */ if ((arg = (char *) cmd_ln_access ("-lambdafn")) != NULL) { interp = interp_init (arg); /* Verify interpolation weights size with senones */ if (interp->n_sen != sen->n_sen) E_FATAL("Interpolation file has %d weights; but #senone= %d\n", interp->n_sen, sen->n_sen); } else interp = NULL; /* Transition matrices */ tpfloor = *((float32 *) cmd_ln_access("-tpfloor")); tmat = tmat_init ((char *) cmd_ln_access("-tmatfn"), tpfloor); /* Verify transition matrices parameters against model definition parameters */ if (mdef->n_tmat != tmat->n_tmat) E_FATAL("Model definition has %d tmat; but #tmat= %d\n", mdef->n_tmat, tmat->n_tmat); if (mdef->n_emit_state != tmat->n_state-1) E_FATAL("#Emitting states in model definition = %d, #states in tmat = %d\n", mdef->n_emit_state, tmat->n_state); arg = (char *) cmd_ln_access ("-agc"); if ((strcmp (arg, "max") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -agc argument: %s\n", arg); arg = (char *) cmd_ln_access ("-cmn"); if ((strcmp (arg, "current") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -cmn argument: %s\n", arg); }
void lmcontext_load (corpus_t *corp, char *uttid, s3wid_t *pred, s3wid_t *succ) { char *str, wd[4096], *strp; s3wid_t w[3]; int32 i, n; dict_t *dict; s3lmwid_t lwid; if ((str = corpus_lookup (corp, uttid)) == NULL) E_FATAL("Couldn't find LM context for %s\n", uttid); dict = dict_getdict (); strp = str; for (i = 0; i < 4; i++) { if (sscanf (strp, "%s%n", wd, &n) != 1) { if (i < 3) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); else break; } strp += n; if (strcmp (wd, "-") == 0) w[i] = BAD_WID; else { w[i] = dict_wordid (wd); if (NOT_WID(w[i])) E_FATAL("LM context word (%s) for %s not in dictionary\n", wd, uttid); w[i] = dict_basewid(w[i]); switch (i) { case 0: if ((n = dict->word[w[0]].n_comp) > 0) w[0] = dict->word[w[0]].comp[n-1].wid; break; case 1: if ((n = dict->word[w[1]].n_comp) > 0) { w[0] = dict->word[w[1]].comp[n-2].wid; w[1] = dict->word[w[1]].comp[n-1].wid; } break; case 2: if (w[2] != dict_wordid(FINISH_WORD)) E_FATAL("Illegal successor LM context for %s: %s\n", uttid, str); break; default: assert (0); /* Should never get here */ break; } } } if (IS_WID(w[0]) && NOT_WID(w[1])) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); for (i = 0; i < 3; i++) { if (IS_WID(w[i])) { lwid = lm_lmwid (w[i]); if (NOT_LMWID(lwid)) E_FATAL("LM context word (%s) for %s not in LM\n", wd, uttid); } } pred[0] = w[0]; pred[1] = w[1]; *succ = w[2]; }
static wseg_t *line2wseg (char *line, s3wid_t *ref, s3cipid_t *ap, int8 *ap_err, int32 aplen, char *id) { char word[1024], uttid[1024], *lp; int32 i, k, n_hypci, n_refwd, n_refci, pronlen; s3cipid_t ci; typedef enum {CORR=0, REFERR=1, HYPERR=2} state_t; state_t state; static wseg_t *wseg = NULL; if (! wseg) wseg = (wseg_t *) ckd_calloc (MAX_UTT_LEN, sizeof(wseg_t)); lp = line; n_hypci = n_refci = pronlen = 0; n_refwd = -1; uttid[0] = '\0'; state = CORR; while (sscanf (lp, "%s%n", word, &k) == 1) { lp += k; if (is_uttid (word, uttid)) break; if (strcmp (word, "[[") == 0) { if (state != CORR) E_FATAL("%s: Illegal [[\n", id); state = REFERR; if (n_refci < pronlen) wseg[n_refwd].err = 1; } else if (strcmp (word, "]]") == 0) { if (state != HYPERR) E_FATAL("%s: Illegal ]]\n", id); state = CORR; } else if (strcmp (word, "=>") == 0) { if (state != REFERR) E_FATAL("%s: Illegal =>\n", id); state = HYPERR; } else { ci = mdef_ciphone_id (mdef, word); if (NOT_CIPID(ci)) E_FATAL("%s: Unknown CIphone %s\n", id, word); if (state != HYPERR) { /* Check if matches next pron in ref word */ if (n_refci >= pronlen) { assert (n_refci == pronlen); n_refwd++; pronlen = dict->word[ref[n_refwd]].pronlen; assert (pronlen > 0); wseg[n_refwd].s = (state == CORR) ? n_hypci : -1; wseg[n_refwd].e = -1; wseg[n_refwd].err = 0; n_refci = 0; } if (NOT_WID(ref[n_refwd])) E_FATAL("%s: Premature end of ref wid\n", id); if (dict->word[ref[n_refwd]].ciphone[n_refci] != ci) E_FATAL("%s: CIphone mismatch at word %d, ciphone %d\n", id, n_refwd, n_refci); n_refci++; if ((n_refci == pronlen) && (state == CORR)) wseg[n_refwd].e = n_hypci; if (state != CORR) wseg[n_refwd].err = 1; } if (state != REFERR) { if (n_hypci >= aplen) E_FATAL("%s: Too many CIphones: >%d\n", id, aplen); ap[n_hypci] = ci; ap_err[n_hypci] = (state == CORR) ? 0 : 1; n_hypci++; } } } assert (n_refci == pronlen); n_refwd++; assert (NOT_WID(ref[n_refwd])); wseg[n_refwd].s = wseg[n_refwd].e = n_hypci; wseg[n_refwd].err = 0; ap[n_hypci] = BAD_CIPID; ap_err[n_hypci] = 1; if (strcmp (uttid, id) != 0) E_FATAL("Uttid mismatch: %s expected, %s found\n", id, uttid); #if 0 for (i = 0; IS_WID(ref[i]); i++) { printf ("%s: %4d %4d %d %s\n", id, wseg[i].s, wseg[i].e, wseg[i].err, dict_wordstr (dict, ref[i])); } #endif return wseg; }