ps_nbest_t * ps_nbest(ps_decoder_t *ps, int sf, int ef, char const *ctx1, char const *ctx2) { ps_lattice_t *dag; ngram_model_t *lmset; ps_astar_t *nbest; float32 lwf; int32 w1, w2; if (ps->search == NULL) return NULL; if ((dag = ps_get_lattice(ps)) == NULL) return NULL; /* FIXME: This is all quite specific to N-Gram search. Either we * should make N-best a method for each search module or it needs * to be abstracted to work for N-Gram and FSG. */ if (0 != strcmp(ps_search_name(ps->search), PS_SEARCH_NGRAM)) { lmset = NULL; lwf = 1.0f; } else { lmset = ((ngram_search_t *)ps->search)->lmset; lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio; } w1 = ctx1 ? dict_wordid(ps_search_dict(ps->search), ctx1) : -1; w2 = ctx2 ? dict_wordid(ps_search_dict(ps->search), ctx2) : -1; nbest = ps_astar_start(dag, lmset, lwf, sf, ef, w1, w2); return (ps_nbest_t *)nbest; }
/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* LM */ lm_read ((char *) cmd_ln_access("-lmfn"), ""); /* Filler penalties */ fillpen_init ((char *) cmd_ln_access("-fillpenfn"), dict->filler_start, dict->filler_end); }
int32 align_init ( void ) { int32 k; s3wid_t w; float64 *f64arg; mdef = mdef_getmdef (); tmat = tmat_gettmat (); dict = dict_getdict (); assert (mdef && tmat && dict); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); silwid = dict_wordid (SILENCE_WORD); if ((NOT_WID(startwid)) || (NOT_WID(finishwid))) E_FATAL("%s or %s not in dictionary\n", START_WORD, FINISH_WORD); if (NOT_WID(silwid)) E_ERROR("%s not in dictionary; no optional silence inserted between words\n", SILENCE_WORD); /* Create list of optional filler words to be inserted between transcript words */ fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3), sizeof(s3wid_t)); k = 0; if (IS_WID(silwid)) fillwid[k++] = silwid; for (w = dict->filler_start; w <= dict->filler_end; w++) { if ((dict_basewid (w) == w) && (w != silwid) && (w != startwid) && (w != finishwid)) fillwid[k++] = w; } fillwid[k] = BAD_WID; f64arg = (float64 *) cmd_ln_access ("-beam"); beam = logs3 (*f64arg); E_INFO ("logs3(beam)= %d\n", beam); score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); hist_head = NULL; align_stseg = NULL; align_phseg = NULL; align_wdseg = NULL; ctr_nstate = counter_new ("NS"); return 0; }
main (int32 argc, char *argv[]) { char *reffile, *mdeffile, *dictfile, *fdictfile, *homfile; if (argc == 1) { cmd_ln_print_help (stderr, arglist); exit(0); } cmd_ln_parse (arglist, argc, argv); if ((mdeffile = (char *) cmd_ln_access ("-mdef")) == NULL) E_FATAL("-mdef argument missing\n"); if ((dictfile = (char *) cmd_ln_access ("-dict")) == NULL) E_FATAL("-dict argument missing\n"); if ((fdictfile = (char *) cmd_ln_access ("-fdict")) == NULL) E_FATAL("-fdict argument missing\n"); if ((reffile = (char *) cmd_ln_access ("-ref")) == NULL) E_FATAL("-ref argument missing\n"); unlimit(); mdef = mdef_init (mdeffile); if (mdef->n_ciphone <= 0) E_FATAL("0 CIphones in %s\n", mdeffile); dict = dict_init (mdef, dictfile, fdictfile); oovbegin = dict->n_word; startwid = dict_wordid (dict, "<s>"); finishwid = dict_wordid (dict, "</s>"); silwid = dict_wordid (dict, (char *) cmd_ln_access("-sil")); assert (dict_filler_word (dict, silwid)); homlist = NULL; if ((homfile = (char *) cmd_ln_access ("-hom")) != NULL) homfile_load (homfile); process_reffile (reffile); #if (0 && (! WIN32)) fflush (stdout); fflush (stderr); system ("ps aguxwww | grep dpalign"); #endif exit(0); }
alignment_t * parse_alignment(char *line, dict2pid_t *d2p) { alignment_t *al; char **wptr; int nf, i; double spos; int32 frate = 100; /* FIXME */ nf = str2words(line, NULL, 0); if (nf < 0) return NULL; wptr = ckd_calloc(nf, sizeof(*wptr)); nf = str2words(line, wptr, nf); if (nf < 0) { ckd_free(wptr); return NULL; } al = alignment_init(d2p); spos = 0.0; for (i = 0; i < nf; ++i) { char *c = strchr(wptr[i], ':'); double epos; int duration; if (c == NULL) /* word ID */ break; *c++ = '\0'; epos = atof(c); duration = (int) ((epos - spos) * frate); alignment_add_word(al, dict_wordid(d2p->dict, wptr[i]), duration); spos = epos; } return al; }
main (int32 argc, char *argv[]) { mdef_t *m; dict_t *d; char wd[1024]; s3wid_t wid; int32 p; if (argc < 3) E_FATAL("Usage: %s {mdeffile | NULL} dict [fillerdict]\n", argv[0]); m = (strcmp (argv[1], "NULL") != 0) ? mdef_init (argv[1]) : NULL; d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), '_'); for (;;) { printf ("word> "); scanf ("%s", wd); wid = dict_wordid (d, wd); if (NOT_WID(wid)) E_ERROR("Unknown word\n"); else { for (wid = dict_basewid(d, wid); IS_WID(wid); wid = d->word[wid].alt) { printf ("%s\t", dict_wordstr(d, wid)); for (p = 0; p < d->word[wid].pronlen; p++) printf (" %s", dict_ciphone_str (d, wid, p)); printf ("\n"); } } } }
static void homfile_load (char *file) { FILE *fp; char line[16380], w1[4096], w2[4096]; int32 k, n; s3wid_t wid1, wid2; s3cipid_t ci[1]; hom_t *h; E_INFO("Reading homophones file %s\n", file); if ((fp = fopen(file, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", file); ci[0] = (s3cipid_t) 0; /* Dummy */ n = 0; while (fgets (line, sizeof(line), fp) != NULL) { if ((k = sscanf (line, "%s %s", w1, w2)) == 2) { wid1 = dict_wordid (dict, w1); if (NOT_WID(wid1)) { E_INFO("Adding %s to dictionary\n", w1); wid1 = dict_add_word (dict, w1, ci, 1); if (NOT_WID(wid1)) E_FATAL("dict_add_word(%s) failed\n", w1); } wid2 = dict_wordid (dict, w2); if ((NOT_WID(wid2)) || (wid2 >= oovbegin)) E_FATAL("%s not in dictionary\n", w2); h = (hom_t *) listelem_alloc (sizeof(hom_t)); h->w1 = wid1; h->w2 = wid2; h->next = homlist; homlist = h; n++; } else E_FATAL("Bad homophones line: %s\n", line); } E_INFO("%d homophone pairs read\n", n); fclose (fp); }
int main(int argc, char *argv[]) { bin_mdef_t *mdef; dict_t *dict; cmd_ln_t *config; int i; char buf[100]; TEST_ASSERT(config = cmd_ln_init(NULL, NULL, FALSE, "-dict", MODELDIR "/en-us/cmudict-en-us.dict", "-fdict", MODELDIR "/en-us/en-us/noisedict", NULL)); /* Test dictionary in standard fashion. */ TEST_ASSERT(mdef = bin_mdef_read(NULL, MODELDIR "/en-us/en-us/mdef")); TEST_ASSERT(dict = dict_init(config, mdef, NULL)); printf("Word ID (CARNEGIE) = %d\n", dict_wordid(dict, "CARNEGIE")); printf("Word ID (ASDFASFASSD) = %d\n", dict_wordid(dict, "ASDFASFASSD")); TEST_EQUAL(0, dict_write(dict, "_cmu07a.dic", NULL)); TEST_EQUAL(0, system("diff -uw " MODELDIR "/en-us/cmudict-en-us.dict _cmu07a.dic")); dict_free(dict); bin_mdef_free(mdef); /* Now test an empty dictionary. */ TEST_ASSERT(dict = dict_init(NULL, NULL, NULL)); printf("Word ID(<s>) = %d\n", dict_wordid(dict, "<s>")); TEST_ASSERT(BAD_S3WID != dict_add_word(dict, "FOOBIE", NULL, 0)); TEST_ASSERT(BAD_S3WID != dict_add_word(dict, "BLETCH", NULL, 0)); printf("Word ID(FOOBIE) = %d\n", dict_wordid(dict, "FOOBIE")); printf("Word ID(BLETCH) = %d\n", dict_wordid(dict, "BLETCH")); TEST_ASSERT(dict_real_word(dict, dict_wordid(dict, "FOOBIE"))); TEST_ASSERT(dict_real_word(dict, dict_wordid(dict, "BLETCH"))); TEST_ASSERT(!dict_real_word(dict, dict_wordid(dict, "</s>"))); dict_free(dict); /* Test to add 500k words. */ TEST_ASSERT(dict = dict_init(NULL, NULL, NULL)); for (i = 0; i < 500000; i++) { sprintf(buf, "word_%d", i); TEST_ASSERT(BAD_S3WID != dict_add_word(dict, buf, NULL, 0)); } dict_free(dict); cmd_ln_free_r(config); return 0; }
/* * Scan the dictionary for compound words. This function should be called just after * loading the dictionary. For the moment, compound words in a compound word are * assumed to be separated by the given sep character, (underscore in the CMU dict). * Return value: #compound words found in dictionary. */ static int32 dict_build_comp (dict_t *d, char sep) /* Separator character */ { char wd[4096]; int32 w, cwid; dictword_t *wordp; int32 nc; /* # compound words in dictionary */ int32 i, j, l, n; nc = 0; for (w = 0; w < d->n_word; w++) { wordp = d->word + dict_basewid(d, w); strcpy (wd, wordp->word); l = strlen(wd); if ((wd[0] == sep) || (wd[l-1] == sep)) E_FATAL("Bad compound word %s: leading or trailing separator\n", wordp->word); /* Count no. of components in this word */ n = 1; for (i = 1; i < l-1; i++) /* 0 and l-1 already checked above */ if (wd[i] == sep) n++; if (n == 1) continue; /* Not a compound word */ nc++; if ((w == d->startwid) || (w == d->finishwid) || dict_filler_word (d, w)) E_FATAL("Compound special/filler word (%s) not allowed\n", wordp->word); /* Allocate and fill in component word info */ wordp->n_comp = n; wordp->comp = (s3wid_t *) ckd_calloc (n, sizeof(s3wid_t)); /* Parse word string into components */ n = 0; for (i = 0; i < l; i++) { for (j = i; (i < l) && (wd[i] != sep); i++); if (j == i) E_FATAL("Bad compound word %s: successive separators\n", wordp->word); wd[i] = '\0'; cwid = dict_wordid (d, wd+j); if (NOT_WID(cwid)) E_FATAL("Component word %s of %s not in dictionary\n", wd+j, wordp->word); wordp->comp[n] = cwid; n++; } } if (nc > 0) d->comp_head = dict_comp_head (d); return nc; }
static int write_ctm(FILE *fh, ps_decoder_t *ps, ps_seg_t *itor, char const *uttid, int32 frate) { logmath_t *lmath = ps_get_logmath(ps); char *dupid, *show, *channel, *c; double ustart = 0.0; /* We have semi-standardized on comma-separated uttids which * correspond to the fields of the STM file. So if there's a * comma in the uttid, take the first two fields as show and * channel, and also try to find the start time. */ show = dupid = ckd_salloc(uttid ? uttid : "(null)"); if ((c = strchr(dupid, ',')) != NULL) { *c++ = '\0'; channel = c; if ((c = strchr(c, ',')) != NULL) { *c++ = '\0'; if ((c = strchr(c, ',')) != NULL) { ustart = atof_c(c + 1); } } } else { channel = NULL; } while (itor) { int32 prob, sf, ef, wid; char const *w; /* Skip things that aren't "real words" (FIXME: currently * requires s3kr3t h34d3rz...) */ w = ps_seg_word(itor); wid = dict_wordid(ps->dict, w); if (wid >= 0 && dict_real_word(ps->dict, wid)) { prob = ps_seg_prob(itor, NULL, NULL, NULL); ps_seg_frames(itor, &sf, &ef); fprintf(fh, "%s %s %.2f %.2f %s %.3f\n", show, channel ? channel : "1", ustart + (double)sf / frate, (double)(ef - sf) / frate, /* FIXME: More s3kr3tz */ dict_basestr(ps->dict, wid), logmath_exp(lmath, prob)); } itor = ps_seg_next(itor); } ckd_free(dupid); return 0; }
main (int32 argc, char *argv[]) { dict_t **d; int32 i, k, p, wid; char line[16384], *wp[1024]; if (argc < 2) { E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]); exit(0); } d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *)); for (i = 1; i < argc; i++) d[i-1] = dict_init (NULL, argv[i], NULL, 0); while (fgets (line, sizeof(line), stdin) != NULL) { if ((k = str2words (line, wp, 1024)) < 0) E_FATAL("Line too long: %s\n", line); if (k > 2) E_FATAL("Vocab entry contains too many words\n"); if (k == 0) continue; if (k == 1) wp[1] = wp[0]; /* Look up word in each dictionary until found */ k = 0; for (i = 0; (i < argc-1) && (k == 0); i++) { wid = dict_wordid (d[i], wp[1]); if (NOT_WID(wid)) continue; for (wid = dict_basewid(d[i], wid); IS_WID(wid); wid = dict_nextalt(d[i], wid)) { k++; if (k == 1) printf ("%s\t", wp[0]); else printf ("%s(%d)\t", wp[0], k); for (p = 0; p < dict_pronlen(d[i], wid); p++) printf (" %s", dict_ciphone_str (d[i], wid, p)); printf ("\n"); } } if (k == 0) E_ERROR("No pronunciation for: '%s'\n", wp[0]); } }
int32 line2wid (dict_t *dict, char *line, s3wid_t *wid, int32 max_n_wid, int32 add_oov, char *uttid) { char *lp, word[1024]; int32 n, k; s3wid_t w; s3cipid_t ci[1]; uttid[0] = '\0'; ci[0] = (s3cipid_t) 0; lp = line; n = 0; while (sscanf (lp, "%s%n", word, &k) == 1) { lp += k; if (n >= max_n_wid) return -n; if (is_uttid (word, uttid)) break; wid[n] = dict_wordid (dict, word); /* Up to caller to handle BAD_WIDs */ if (NOT_WID(wid[n])) { /* OOV word */ if (add_oov) { E_INFO("Adding %s to dictionary\n", word); wid[n] = dict_add_word (dict, word, NULL, 0); if (NOT_WID(wid[n])) E_FATAL("dict_add_word(%s) failed for line: %s\n", word, line); } else E_FATAL("Unknown word (%s) in line: %s\n", word, line); } n++; } if (sscanf (lp, "%s", word) == 1) /* Check that line really ended */ E_WARN("Nonempty data ignored after uttid(%s) in line: %s\n", uttid, line); return n; }
char * ps_lookup_word(ps_decoder_t *ps, const char *word) { s3wid_t wid; int32 phlen, j; char *phones; dict_t *dict = ps->dict; wid = dict_wordid(dict, word); if (wid == BAD_S3WID) return NULL; for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j) phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1; phones = ckd_calloc(1, phlen); for (j = 0; j < dict_pronlen(dict, wid); ++j) { strcat(phones, dict_ciphone_str(dict, wid, j)); if (j != dict_pronlen(dict, wid) - 1) strcat(phones, " "); } return phones; }
static fwd_dbg_t * init_fwd_dbg(srch_FLAT_FWD_graph_t * fwg) { const char *tmpstr; fwd_dbg_t *fd; fd = (fwd_dbg_t *) ckd_calloc(1, sizeof(fwd_dbg_t)); assert(fd); /* Word to be traced in detail */ if ((tmpstr = cmd_ln_str_r(kbcore_config(fwg->kbcore), "-tracewhmm")) != NULL) { fd->trace_wid = dict_wordid(fwg->kbcore->dict, tmpstr); if (NOT_S3WID(fd->trace_wid)) E_ERROR("%s not in dictionary; cannot be traced\n", tmpstr); } else fd->trace_wid = BAD_S3WID; /* Active words to be dumped for debugging after and before the given frame nos, if any */ fd->word_dump_sf = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf")) fd->word_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf"); fd->word_dump_ef = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef")) fd->word_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef"); /* Active HMMs to be dumped for debugging after and before the given frame nos, if any */ fd->hmm_dump_sf = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf")) fd->hmm_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf"); fd->hmm_dump_ef = (int32) 0x7ffffff0; if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef")) fd->hmm_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef"); return fd; }
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t * mdef) { FILE *fp, *fp2; int32 n; lineiter_t *li; dict_t *d; s3cipid_t sil; char const *dictfile = NULL, *fillerfile = NULL; if (config) { dictfile = cmd_ln_str_r(config, "-dict"); fillerfile = cmd_ln_str_r(config, "-fdict"); } /* * First obtain #words in dictionary (for hash table allocation). * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate * all the required memory in one go. */ fp = NULL; n = 0; if (dictfile) { if ((fp = fopen(dictfile, "r")) == NULL) E_FATAL_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile); for (li = lineiter_start(fp); li; li = lineiter_next(li)) { if (li->buf[0] != '#') n++; } rewind(fp); } fp2 = NULL; if (fillerfile) { if ((fp2 = fopen(fillerfile, "r")) == NULL) E_FATAL_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile); for (li = lineiter_start(fp2); li; li = lineiter_next(li)) { if (li->buf[0] != '#') n++; } rewind(fp2); } /* * Allocate dict entries. HACK!! Allow some extra entries for words not in file. * Also check for type size restrictions. */ d = (dict_t *) ckd_calloc(1, sizeof(dict_t)); /* freed in dict_free() */ d->refcnt = 1; d->max_words = (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID; if (n >= MAX_S3WID) E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_S3WID); E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n", d->max_words, sizeof(dictword_t), d->max_words * sizeof(dictword_t) / 1024); d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t)); /* freed in dict_free() */ d->n_word = 0; if (mdef) d->mdef = bin_mdef_retain(mdef); /* Create new hash table for word strings; case-insensitive word strings */ if (config && cmd_ln_exists_r(config, "-dictcase")) d->nocase = cmd_ln_boolean_r(config, "-dictcase"); d->ht = hash_table_new(d->max_words, d->nocase); /* Digest main dictionary file */ if (fp) { E_INFO("Reading main dictionary: %s\n", dictfile); dict_read(fp, d); fclose(fp); E_INFO("%d words read\n", d->n_word); } /* Now the filler dictionary file, if it exists */ d->filler_start = d->n_word; if (fillerfile) { E_INFO("Reading filler dictionary: %s\n", fillerfile); dict_read(fp2, d); fclose(fp2); E_INFO("%d words read\n", d->n_word - d->filler_start); } if (mdef) sil = bin_mdef_silphone(mdef); else sil = 0; if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) { dict_add_word(d, S3_START_WORD, &sil, 1); } if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) { dict_add_word(d, S3_FINISH_WORD, &sil, 1); } if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) { dict_add_word(d, S3_SILENCE_WORD, &sil, 1); } d->filler_end = d->n_word - 1; /* Initialize distinguished word-ids */ d->startwid = dict_wordid(d, S3_START_WORD); d->finishwid = dict_wordid(d, S3_FINISH_WORD); d->silwid = dict_wordid(d, S3_SILENCE_WORD); if ((d->filler_start > d->filler_end) || (!dict_filler_word(d, d->silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", S3_SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ return d; }
/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { float32 varfloor, mixwfloor, tpfloor; int32 i, s; s3cipid_t ci; s3wid_t w; char *arg; dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* Codebooks */ varfloor = *((float32 *) cmd_ln_access("-varfloor")); g = gauden_init ((char *) cmd_ln_access("-meanfn"), (char *) cmd_ln_access("-varfn"), varfloor); /* Verify codebook feature dimensions against libfeat */ n_feat = feat_featsize (&featlen); if (n_feat != g->n_feat) E_FATAL("#feature mismatch: s2= %d, mean/var= %d\n", n_feat, g->n_feat); for (i = 0; i < n_feat; i++) if (featlen[i] != g->featlen[i]) E_FATAL("featlen[%d] mismatch: s2= %d, mean/var= %d\n", i, featlen[i], g->featlen[i]); /* Senone mixture weights */ mixwfloor = *((float32 *) cmd_ln_access("-mwfloor")); sen = senone_init ((char *) cmd_ln_access("-mixwfn"), (char *) cmd_ln_access("-senmgaufn"), mixwfloor); /* Verify senone parameters against gauden parameters */ if (sen->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, sen->n_feat); if (sen->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, sen->n_cw); if (sen->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); if (sen->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); /* Verify senone parameters against model definition parameters */ if (mdef->n_sen != sen->n_sen) E_FATAL("Model definition has %d senones; but #senone= %d\n", mdef->n_sen, sen->n_sen); /* CD/CI senone interpolation weights file, if present */ if ((arg = (char *) cmd_ln_access ("-lambdafn")) != NULL) { interp = interp_init (arg); /* Verify interpolation weights size with senones */ if (interp->n_sen != sen->n_sen) E_FATAL("Interpolation file has %d weights; but #senone= %d\n", interp->n_sen, sen->n_sen); } else interp = NULL; /* Transition matrices */ tpfloor = *((float32 *) cmd_ln_access("-tpfloor")); tmat = tmat_init ((char *) cmd_ln_access("-tmatfn"), tpfloor); /* Verify transition matrices parameters against model definition parameters */ if (mdef->n_tmat != tmat->n_tmat) E_FATAL("Model definition has %d tmat; but #tmat= %d\n", mdef->n_tmat, tmat->n_tmat); if (mdef->n_emit_state != tmat->n_state-1) E_FATAL("#Emitting states in model definition = %d, #states in tmat = %d\n", mdef->n_emit_state, tmat->n_state); arg = (char *) cmd_ln_access ("-agc"); if ((strcmp (arg, "max") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -agc argument: %s\n", arg); arg = (char *) cmd_ln_access ("-cmn"); if ((strcmp (arg, "current") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -cmn argument: %s\n", arg); }
/** * Compute the left and right context CIphone sets for each state. */ static void fsg_lextree_lc_rc(fsg_lextree_t *lextree) { int32 s, i, j; int32 n_ci; fsg_model_t *fsg; int32 silcipid; int32 len; silcipid = bin_mdef_silphone(lextree->mdef); assert(silcipid >= 0); n_ci = bin_mdef_n_ciphone(lextree->mdef); fsg = lextree->fsg; /* * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s] * for right context CIphones. */ lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc)); lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc)); E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n", fsg->n_state * (n_ci + 1) * 2, fsg->n_state * (n_ci + 1) * 2 / 1024); for (s = 0; s < fsg->n_state; s++) { fsg_arciter_t *itor; for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *l = fsg_arciter_get(itor); int32 dictwid; /**< Dictionary (not FSG) word ID!! */ if (fsg_link_wid(l) >= 0) { dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, l->wid)); /* * Add the first CIphone of l->wid to the rclist of state s, and * the last CIphone to lclist of state d. * (Filler phones are a pain to deal with. There is no direct * marking of a filler phone; but only filler words are supposed to * use such phones, so we use that fact. HACK!! FRAGILE!!) */ if (fsg_model_is_filler(fsg, fsg_link_wid(l))) { /* Filler phone; use silence phone as context */ lextree->rc[fsg_link_from_state(l)][silcipid] = 1; lextree->lc[fsg_link_to_state(l)][silcipid] = 1; } else { len = dict_pronlen(lextree->dict, dictwid); lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1; lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1; } } } } for (s = 0; s < fsg->n_state; s++) { /* * Add SIL phone to the lclist and rclist of each state. Strictly * speaking, only needed at start and final states, respectively, but * all states considered since the user may change the start and final * states. In any case, most applications would have a silence self * loop at each state, hence these would be needed anyway. */ lextree->lc[s][silcipid] = 1; lextree->rc[s][silcipid] = 1; } /* * Propagate lc and rc lists past null transitions. (Since FSG contains * null transitions closure, no need to worry about a chain of successive * null transitions. Right??) * * This can't be joined with the previous loop because we first calculate * contexts and only then we can propagate them. */ for (s = 0; s < fsg->n_state; s++) { fsg_arciter_t *itor; for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { fsg_link_t *l = fsg_arciter_get(itor); if (fsg_link_wid(l) < 0) { /* * lclist(d) |= lclist(s), because all the words ending up at s, can * now also end at d, becoming the left context for words leaving d. */ for (i = 0; i < n_ci; i++) lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i]; /* * Similarly, rclist(s) |= rclist(d), because all the words leaving d * can equivalently leave s, becoming the right context for words * ending up at s. */ for (i = 0; i < n_ci; i++) lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i]; } } } /* Convert the bit-vector representation into a list */ for (s = 0; s < fsg->n_state; s++) { j = 0; for (i = 0; i < n_ci; i++) { if (lextree->lc[s][i]) { lextree->lc[s][j] = i; j++; } } lextree->lc[s][j] = -1; /* Terminate the list */ j = 0; for (i = 0; i < n_ci; i++) { if (lextree->rc[s][i]) { lextree->rc[s][j] = i; j++; } } lextree->rc[s][j] = -1; /* Terminate the list */ } }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ int32 tmatid; gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; ++n_lc_alloc; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } }
/* * Load a DAG from a file: each unique <word-id,start-frame> is a node, i.e. with * a single start time but it can represent several end times. Links are created * whenever nodes are adjacent in time. * Return value: ptr to DAG structure if successful; NULL otherwise. */ dag_t *dag_load (char *file) { FILE *fp; dag_t *dag; int32 seqid, sf, fef, lef, ef; char line[16384], wd[4096]; int32 i, j, k; dagnode_t *d, *d2, **darray; s3wid_t w; int32 fudge, min_ef_range; E_INFO("Reading DAG file: %s\n", file); if ((fp = fopen (file, "r")) == NULL) { E_ERROR("fopen(%s,r) failed\n", file); return NULL; } dag = ckd_calloc (1, sizeof(dag_t)); dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *)); dag->nnode = 0; dag->nlink = 0; dag->nfrm = 0; /* Read Frames parameter */ if ((dag->nfrm = dag_param_read (fp, "Frames")) <= 0) E_FATAL("%s: Frames parameter missing or invalid\n", file); /* Read Nodes parameter */ if ((dag->nnode = dag_param_read (fp, "Nodes")) <= 0) E_FATAL("%s: Nodes parameter missing or invalid\n", file); /* Read nodes */ darray = (dagnode_t **) ckd_calloc (dag->nnode, sizeof(dagnode_t *)); for (i = 0; i < dag->nnode; i++) { if (fgets (line, sizeof(line), fp) == NULL) E_FATAL("%s: Premature EOF\n", file); if ((k = sscanf (line, "%d %s %d %d %d", &seqid, wd, &sf, &fef, &lef)) != 5) E_FATAL("%s: Bad line: %s\n", file, line); if ((sf < 0) || (sf >= dag->nfrm) || (fef < 0) || ( fef >= dag->nfrm) || (lef < 0) || ( lef >= dag->nfrm)) E_FATAL("%s: Bad frame info: %s\n", file, line); w = dict_wordid (dict, wd); if (NOT_WID(w)) E_FATAL("%s: Unknown word: %s\n", file, line); if (seqid != i) E_FATAL("%s: Seqno error: %s\n", file, line); d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t)); darray[i] = d; d->wid = w; d->seqid = seqid; d->reachable = 0; d->sf = sf; d->fef = fef; d->lef = lef; d->succlist = NULL; d->predlist = NULL; d->next = dag->node_sf[sf]; dag->node_sf[sf] = d; } /* Read initial node ID */ if (((k = dag_param_read (fp, "Initial")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Initial node parameter missing or invalid\n", file); dag->entry.src = NULL; dag->entry.dst = darray[k]; dag->entry.next = NULL; /* Read final node ID */ if (((k = dag_param_read (fp, "Final")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Final node parameter missing or invalid\n", file); dag->exit.src = NULL; dag->exit.dst = darray[k]; dag->exit.next = NULL; ckd_free (darray); /* That's all I need darray for??? */ /* Read bestsegscore entries; just to make sure all nodes have been read */ if ((k = dag_param_read (fp, "BestSegAscr")) < 0) E_FATAL("%s: BestSegAscr parameter missing\n", file); fclose (fp); /* * Build edges based on time-adjacency. * min_ef_range = min. endframes that a node must persist for it to be not ignored. * fudge = #frames to be fudged around word begin times */ min_ef_range = *((int32 *) cmd_ln_access ("-min_endfr")); fudge = *((int32 *) cmd_ln_access ("-dagfudge")); if (min_ef_range <= 0) E_FATAL("Bad min_endfr argument: %d\n", min_ef_range); if ((fudge < 0) || (fudge > 2)) E_FATAL("Bad dagfudge argument: %d\n", fudge); dag->nlink = 0; for (sf = 0; sf < dag->nfrm; sf++) { for (d = dag->node_sf[sf]; d; d = d->next) { if ((d->lef - d->fef < min_ef_range - 1) && (d != dag->entry.dst)) continue; if (d->wid == finishwid) continue; for (ef = d->fef - fudge + 1; ef <= d->lef + 1; ef++) { for (d2 = dag->node_sf[ef]; d2; d2 = d2->next) { if ((d2->lef - d2->fef < min_ef_range - 1) && (d2 != dag->exit.dst)) continue; dag_link (d, d2); dag->nlink++; } } } } return dag; }
int main(int argc, char *argv[]) { ps_decoder_t *ps; bin_mdef_t *mdef; dict_t *dict; dict2pid_t *d2p; acmod_t *acmod; ps_alignment_t *al; ps_alignment_iter_t *itor; ps_search_t *search; state_align_search_t *sas; cmd_ln_t *config; int i; config = cmd_ln_init(NULL, ps_args(), FALSE, "-hmm", MODELDIR "/hmm/en_US/hub4wsj_sc_8k", "-dict", MODELDIR "/lm/en_US/cmu07a.dic", "-input_endian", "little", "-samprate", "16000", NULL); TEST_ASSERT(ps = ps_init(config)); dict = ps->dict; d2p = ps->d2p; acmod = ps->acmod; mdef = d2p->mdef; al = ps_alignment_init(d2p); TEST_EQUAL(1, ps_alignment_add_word(al, dict_wordid(dict, "<s>"), 0)); TEST_EQUAL(2, ps_alignment_add_word(al, dict_wordid(dict, "go"), 0)); TEST_EQUAL(3, ps_alignment_add_word(al, dict_wordid(dict, "forward"), 0)); TEST_EQUAL(4, ps_alignment_add_word(al, dict_wordid(dict, "ten"), 0)); TEST_EQUAL(5, ps_alignment_add_word(al, dict_wordid(dict, "meters"), 0)); TEST_EQUAL(6, ps_alignment_add_word(al, dict_wordid(dict, "</s>"), 0)); TEST_EQUAL(0, ps_alignment_populate(al)); TEST_ASSERT(search = state_align_search_init(config, acmod, al)); sas = (state_align_search_t *)search; for (i = 0; i < 5; ++i) do_search(search, acmod); itor = ps_alignment_words(al); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 0); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 46); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 46); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 18); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 64); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 53); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 117); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 29); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 146); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 67); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 213); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 61); itor = ps_alignment_iter_next(itor); TEST_EQUAL(itor, NULL); ps_search_free(search); ps_alignment_free(al); ps_free(ps); return 0; }
fillpen_t *fillpen_init (dict_t *dict, char *file, float64 silprob, float64 fillprob, float64 lw, float64 wip) { s3wid_t w, bw; float64 prob; FILE *fp; char line[1024], wd[1024]; int32 k; fillpen_t *_fillpen; _fillpen = (fillpen_t *) ckd_calloc (1, sizeof(fillpen_t)); _fillpen->dict = dict; _fillpen->lw = lw; _fillpen->wip = wip; if (dict->filler_end >= dict->filler_start) _fillpen->prob = (int32 *) ckd_calloc (dict->filler_end - dict->filler_start + 1, sizeof(int32)); else _fillpen->prob = NULL; /* Initialize all words with filler penalty (HACK!! backward compatibility) */ prob = fillprob; for (w = dict->filler_start; w <= dict->filler_end; w++) _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); /* Overwrite silence penalty (HACK!! backward compatibility) */ w = dict_wordid (dict, S3_SILENCE_WORD); if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end)) E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD); prob = silprob; _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); /* Overwrite with filler prob input file, if specified */ if (! file) return _fillpen; E_INFO("Reading filler penalty file: %s\n", file); if ((fp = fopen (file, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", file); while (fgets (line, sizeof(line), fp) != NULL) { if (line[0] == '#') /* Skip comment lines */ continue; k = sscanf (line, "%s %lf", wd, &prob); if ((k != 0) && (k != 2)) E_FATAL("Bad input line: %s\n", line); w = dict_wordid(dict, wd); if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end)) E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD); _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw); } fclose (fp); /* Replicate fillpen values for alternative pronunciations */ for (w = dict->filler_start; w <= dict->filler_end; w++) { bw = dict_basewid (dict, w); if (bw != w) _fillpen->prob[w-dict->filler_start] = _fillpen->prob[bw-dict->filler_start]; } return _fillpen; }
/* * Build a sentence HMM for the given transcription (wordstr). A two-level DAG is * built: phone-level and state-level. * - <s> and </s> always added at the beginning and end of sentence to form an * augmented transcription. * - Optional <sil> and noise words added between words in the augmented * transcription. * wordstr must contain only the transcript; no extraneous stuff such as utterance-id. * Phone-level HMM structure has replicated nodes to allow for different left and right * context CI phones; hence, each pnode corresponds to a unique triphone in the sentence * HMM. * Return 0 if successful, <0 if any error (eg, OOV word encountered). */ int32 align_build_sent_hmm (char *wordstr) { s3wid_t w, nextw; int32 k, oov; pnode_t *word_end, *node; char *wd, delim, *wdcopy; /* HACK HACKA HACK BHIKSHA */ int32 firsttime = 1; /* END HACK HACKA HACK */ /* Initialize dummy head and tail entries of sent hmm */ phead.wid = BAD_WID; phead.ci = BAD_CIPID; phead.lc = BAD_CIPID; /* No predecessor */ phead.rc = BAD_CIPID; /* Any phone can follow head */ phead.pid = BAD_PID; phead.succlist = NULL; phead.predlist = NULL; phead.next = NULL; /* Will ultimately be the head of list of all pnodes */ phead.id = -1; /* Hardwired */ phead.startstate = NULL; ptail.wid = BAD_WID; ptail.ci = BAD_CIPID; ptail.lc = BAD_CIPID; /* Any phone can precede tail */ ptail.rc = BAD_CIPID; /* No successor */ ptail.pid = BAD_PID; ptail.succlist = NULL; ptail.predlist = NULL; ptail.next = NULL; ptail.id = -2; /* Hardwired */ ptail.startstate = NULL; n_pnode = 0; pnode_list = NULL; oov = 0; /* State-level DAG initialization should be here in case the build is aborted */ shead.pnode = &phead; shead.succlist = NULL; shead.predlist = NULL; shead.sen = BAD_SENID; shead.state = mdef->n_emit_state; shead.hist = NULL; stail.pnode = &ptail; stail.succlist = NULL; stail.predlist = NULL; stail.sen = BAD_SENID; stail.state = 0; stail.hist = NULL; /* Obtain the first transcript word */ k = nextword (wordstr, " \t\n", &wd, &delim); if (k < 0) nextw = finishwid; else { wordstr = wd + k; wdcopy = ckd_salloc (wd); *wordstr = delim; nextw = dict_wordid (wdcopy); if (IS_WID(nextw)) nextw = dict_basewid (nextw); } /* Create node(s) for <s> before any transcript word */ /* HACK HACKA HACK BHIKSHA word_end = append_transcript_word (startwid, &phead, nextw, 0, 1); END HACK HACKA HACK BHIKSHA */ /* Append each word in transcription to partial sent HMM created so far */ while (k >= 0) { w = nextw; if (NOT_WID(w)) { E_ERROR("%s not in dictionary\n", wdcopy); oov = 1; /* Hack!! Temporarily set w to some dummy just to run through sentence */ w = finishwid; } ckd_free (wdcopy); k = nextword (wordstr, " \t\n", &wd, &delim); if (k < 0) nextw = finishwid; else { wordstr = wd + k; wdcopy = ckd_salloc (wd); *wordstr = delim; nextw = dict_wordid (wdcopy); if (IS_WID(nextw)) nextw = dict_basewid (nextw); } /* HACK HACKA HACK BHIKSHA */ if (firsttime){ word_end = append_transcript_word (w, &phead, nextw, 0, 1); firsttime = 0; } else if (nextw == finishwid) word_end = append_transcript_word (w, word_end, BAD_WID, 1, 0); else word_end = append_transcript_word (w, word_end, nextw, 1, 1); /* END HACK HACKA HACK BHIKSHA */ } if (oov) return -1; /* Append phone HMMs for </s> at the end; link to tail node */ /* HACK HACKA HACK BHIKSHA word_end = append_transcript_word (finishwid, word_end, BAD_WID, 1, 0); END HACK HACKA HACK BHIKSHA */ for (node = word_end; node; node = node->next) link_pnodes (node, &ptail); /* Build state-level DAG from the phone-level one */ build_state_dag (); /* Dag must begin and end at shead and stail, respectively */ assert (shead.succlist); assert (stail.predlist); assert (! shead.predlist); assert (! stail.succlist); #if _DEBUG_ALIGN_ dump_sent_hmm (); /* For debugging */ #endif k = n_pnode * mdef->n_emit_state; if (k > active_list_size) { /* Need to grow active list arrays */ if (active_list_size > 0) { ckd_free (cur_active); ckd_free (next_active); } for (; active_list_size <= k; active_list_size += ACTIVE_LIST_SIZE_INCR); cur_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); next_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *)); } return 0; }
dict_t *dict_init (mdef_t *mdef, char *dictfile, char *fillerfile, char comp_sep) { FILE *fp, *fp2; int32 n ; char line[1024]; dict_t *d; if (! dictfile) E_FATAL("No dictionary file\n"); /* * First obtain #words in dictionary (for hash table allocation). * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate * all the required memory in one go. */ if ((fp = fopen(dictfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", dictfile); n = 0; while (fgets (line, sizeof(line), fp) != NULL) { if (line[0] != '#') n++; } rewind (fp); if (fillerfile) { if ((fp2 = fopen(fillerfile, "r")) == NULL) E_FATAL_SYSTEM("fopen(%s,r) failed\n", fillerfile); while (fgets (line, sizeof(line), fp2) != NULL) { if (line[0] != '#') n++; } rewind (fp2); } /* * Allocate dict entries. HACK!! Allow some extra entries for words not in file. * Also check for type size restrictions. */ d = (dict_t *) ckd_calloc (1, sizeof(dict_t)); d->max_words = (n+1024 < MAX_WID) ? n+1024 : MAX_WID; if (n >= MAX_WID) E_FATAL("#Words in dictionaries (%d) exceeds limit (%d)\n", n, MAX_WID); d->word = (dictword_t *) ckd_calloc (d->max_words, sizeof(dictword_t)); d->n_word = 0; d->mdef = mdef; if (mdef) { d->pht = NULL; d->ciphone_str = NULL; } else { d->pht = hash_new (DEFAULT_NUM_PHONE, 1 /* No case */); d->ciphone_str = (char **) ckd_calloc (DEFAULT_NUM_PHONE, sizeof(char *)); } d->n_ciphone = 0; /* Create new hash table for word strings; case-insensitive word strings */ d->ht = hash_new (d->max_words, 1 /* no-case */); /* Initialize with no compound words */ d->comp_head = NULL; /* Digest main dictionary file */ E_INFO("Reading main dictionary: %s\n", dictfile); dict_read (fp, d); fclose (fp); E_INFO("%d words read\n", d->n_word); /* Now the filler dictionary file, if it exists */ d->filler_start = d->n_word; if (fillerfile) { E_INFO("Reading filler dictionary: %s\n", fillerfile); dict_read (fp2, d); fclose (fp2); E_INFO("%d words read\n", d->n_word - d->filler_start); } d->filler_end = d->n_word-1; /* Initialize distinguished word-ids */ d->startwid = dict_wordid (d, START_WORD); d->finishwid = dict_wordid (d, FINISH_WORD); d->silwid = dict_wordid (d, SILENCE_WORD); if (NOT_WID(d->startwid)) E_WARN("%s not in dictionary\n", START_WORD); if (NOT_WID(d->finishwid)) E_WARN("%s not in dictionary\n", FINISH_WORD); if (NOT_WID(d->silwid)) E_WARN("%s not in dictionary\n", SILENCE_WORD); /* Identify compound words if indicated */ if (comp_sep) { E_INFO("Building compound words (separator = '%c')\n", comp_sep); n = dict_build_comp (d, comp_sep); E_INFO("%d compound words\n", n); } return d; }
int main(int argc, char *argv[]) { ps_decoder_t *ps; dict_t *dict; dict2pid_t *d2p; acmod_t *acmod; ps_alignment_t *al; ps_alignment_iter_t *itor; ps_search_t *search; cmd_ln_t *config; int i; config = cmd_ln_init(NULL, ps_args(), FALSE, "-hmm", MODELDIR "/en-us/en-us", "-dict", MODELDIR "/en-us/cmudict-en-us.dict", "-input_endian", "little", "-samprate", "16000", NULL); TEST_ASSERT(ps = ps_init(config)); dict = ps->dict; d2p = ps->d2p; acmod = ps->acmod; al = ps_alignment_init(d2p); TEST_EQUAL(1, ps_alignment_add_word(al, dict_wordid(dict, "<s>"), 0)); TEST_EQUAL(2, ps_alignment_add_word(al, dict_wordid(dict, "go"), 0)); TEST_EQUAL(3, ps_alignment_add_word(al, dict_wordid(dict, "forward"), 0)); TEST_EQUAL(4, ps_alignment_add_word(al, dict_wordid(dict, "ten"), 0)); TEST_EQUAL(5, ps_alignment_add_word(al, dict_wordid(dict, "meters"), 0)); TEST_EQUAL(6, ps_alignment_add_word(al, dict_wordid(dict, "</s>"), 0)); TEST_EQUAL(0, ps_alignment_populate(al)); TEST_ASSERT(search = state_align_search_init(config, acmod, al)); for (i = 0; i < 5; ++i) do_search(search, acmod); itor = ps_alignment_words(al); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 0); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 3); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 3); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 12); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 15); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 53); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 68); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 36); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 104); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 59); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->start, 163); TEST_EQUAL(ps_alignment_iter_get(itor)->duration, 51); itor = ps_alignment_iter_next(itor); TEST_EQUAL(itor, NULL); ps_search_free(search); ps_alignment_free(al); ps_free(ps); cmd_ln_free_r(config); return 0; }
void lmcontext_load (corpus_t *corp, char *uttid, s3wid_t *pred, s3wid_t *succ) { char *str, wd[4096], *strp; s3wid_t w[3]; int32 i, n; dict_t *dict; s3lmwid_t lwid; if ((str = corpus_lookup (corp, uttid)) == NULL) E_FATAL("Couldn't find LM context for %s\n", uttid); dict = dict_getdict (); strp = str; for (i = 0; i < 4; i++) { if (sscanf (strp, "%s%n", wd, &n) != 1) { if (i < 3) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); else break; } strp += n; if (strcmp (wd, "-") == 0) w[i] = BAD_WID; else { w[i] = dict_wordid (wd); if (NOT_WID(w[i])) E_FATAL("LM context word (%s) for %s not in dictionary\n", wd, uttid); w[i] = dict_basewid(w[i]); switch (i) { case 0: if ((n = dict->word[w[0]].n_comp) > 0) w[0] = dict->word[w[0]].comp[n-1].wid; break; case 1: if ((n = dict->word[w[1]].n_comp) > 0) { w[0] = dict->word[w[1]].comp[n-2].wid; w[1] = dict->word[w[1]].comp[n-1].wid; } break; case 2: if (w[2] != dict_wordid(FINISH_WORD)) E_FATAL("Illegal successor LM context for %s: %s\n", uttid, str); break; default: assert (0); /* Should never get here */ break; } } } if (IS_WID(w[0]) && NOT_WID(w[1])) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); for (i = 0; i < 3; i++) { if (IS_WID(w[i])) { lwid = lm_lmwid (w[i]); if (NOT_LMWID(lwid)) E_FATAL("LM context word (%s) for %s not in LM\n", wd, uttid); } } pred[0] = w[0]; pred[1] = w[1]; *succ = w[2]; }
word_fsg_t * word_fsg_load(s2_fsg_t * fsg, int use_altpron, int use_filler, kbcore_t *kbc) { float32 silprob = kbc->fillpen->silprob; float32 fillprob = kbc->fillpen->fillerprob; float32 lw = kbc->fillpen->lw; word_fsg_t *word_fsg; s2_fsg_trans_t *trans; int32 n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk; int32 wid; int32 logp; glist_t nulls; int32 i, j; assert(fsg); /* Some error checking */ if (lw <= 0.0) E_WARN("Unusual language-weight value: %.3e\n", lw); if (use_filler && ((silprob < 0.0) || (fillprob < 0.0))) { E_ERROR("silprob/fillprob must be >= 0\n"); return NULL; } if ((fsg->n_state <= 0) || ((fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) || ((fsg->final_state < 0) || (fsg->final_state >= fsg->n_state))) { E_ERROR("Bad #states/start_state/final_state values: %d/%d/%d\n", fsg->n_state, fsg->start_state, fsg->final_state); return NULL; } for (trans = fsg->trans_list; trans; trans = trans->next) { if ((trans->from_state < 0) || (trans->from_state >= fsg->n_state) || (trans->to_state < 0) || (trans->to_state >= fsg->n_state) || (trans->prob <= 0) || (trans->prob > 1.0)) { E_ERROR("Bad transition: P(%d -> %d) = %e\n", trans->from_state, trans->to_state, trans->prob); return NULL; } } word_fsg = (word_fsg_t *) ckd_calloc(1, sizeof(word_fsg_t)); word_fsg->name = ckd_salloc(fsg->name ? fsg->name : ""); word_fsg->n_state = fsg->n_state; word_fsg->start_state = fsg->start_state; word_fsg->final_state = fsg->final_state; word_fsg->use_altpron = use_altpron; word_fsg->use_filler = use_filler; word_fsg->lw = lw; word_fsg->lc = NULL; word_fsg->rc = NULL; word_fsg->dict = kbc->dict; word_fsg->mdef = kbc->mdef; word_fsg->tmat = kbc->tmat; word_fsg->n_ciphone = mdef_n_ciphone(kbc->mdef); /* Allocate non-epsilon transition matrix array */ word_fsg->trans = (glist_t **) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(glist_t)); /* Allocate epsilon transition matrix array */ word_fsg->null_trans = (word_fsglink_t ***) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(word_fsglink_t *)); /* Process transitions */ n_null_trans = 0; n_alt_trans = 0; n_filler_trans = 0; n_unk = 0; nulls = NULL; for (trans = fsg->trans_list, n_trans = 0; trans; trans = trans->next, n_trans++) { /* Convert prob to logs2prob and apply language weight */ logp = (int32) (logs3(kbcore_logmath(kbc), trans->prob) * lw); /* Check if word is in dictionary */ if (trans->word) { wid = dict_wordid(kbc->dict, trans->word); if (wid < 0) { E_ERROR("Unknown word '%s'; ignored\n", trans->word); n_unk++; } else if (use_altpron) { wid = dict_basewid(kbc->dict, wid); assert(wid >= 0); } } else wid = -1; /* Null transition */ /* Add transition to word_fsg structure */ i = trans->from_state; j = trans->to_state; if (wid < 0) { if (word_fsg_null_trans_add(word_fsg, i, j, logp) == 1) { n_null_trans++; nulls = glist_add_ptr(nulls, (void *) word_fsg->null_trans[i][j]); } } else { word_fsg_trans_add(word_fsg, i, j, logp, wid); /* Add transitions for alternative pronunciations, if any */ if (use_altpron) { for (wid = dict_nextalt(kbc->dict, wid); wid >= 0; wid = dict_nextalt(kbc->dict, wid)) { word_fsg_trans_add(word_fsg, i, j, logp, wid); n_alt_trans++; n_trans++; } } } } /* Add silence and noise filler word transitions if specified */ if (use_filler) { n_filler_trans = word_fsg_add_filler(word_fsg, silprob, fillprob, kbcore_logmath(kbc)); n_trans += n_filler_trans; } E_INFO ("FSG: %d states, %d transitions (%d null, %d alt, %d filler, %d unknown)\n", word_fsg->n_state, n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk); #if __FSG_DBG__ E_INFO("FSG before NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Null transitions closure */ nulls = word_fsg_null_trans_closure(word_fsg, nulls); glist_free(nulls); #if __FSG_DBG__ E_INFO("FSG after NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Compute left and right context CIphone lists for each state */ word_fsg_lc_rc(word_fsg); #if __FSG_DBG__ E_INFO("FSG after lc/rc:\n"); word_fsg_write(word_fsg, stdout); #endif return word_fsg; }
/* * Add the word emitted by the given transition (fsglink) to the given lextree * (rooted at root), and return the new lextree root. (There may actually be * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. * "root" is the head of this list.) * lclist, rclist: sets of left and right context phones for this link. * alloc_head: head of a linear list of all allocated pnodes for the parent * FSG state, kept elsewhere and updated by this routine. */ static fsg_pnode_t * psubtree_add_trans(fsg_lextree_t *lextree, fsg_pnode_t * root, fsg_glist_linklist_t **curglist, fsg_link_t * fsglink, int16 *lclist, int16 *rclist, fsg_pnode_t ** alloc_head) { int32 silcipid; /* Silence CI phone ID */ int32 pronlen; /* Pronunciation length */ int32 wid; /* FSG (not dictionary!!) word ID */ int32 dictwid; /* Dictionary (not FSG!!) word ID */ int32 ssid; /* Senone Sequence ID */ gnode_t *gn; fsg_pnode_t *pnode, *pred, *head; int32 n_ci, p, lc, rc; glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ int32 i, j; silcipid = bin_mdef_silphone(lextree->mdef); n_ci = bin_mdef_n_ciphone(lextree->mdef); wid = fsg_link_wid(fsglink); assert(wid >= 0); /* Cannot be a null transition */ dictwid = dict_wordid(lextree->dict, fsg_model_word_str(lextree->fsg, wid)); pronlen = dict_pronlen(lextree->dict, dictwid); assert(pronlen >= 1); assert(lclist[0] >= 0); /* At least one phonetic context provided */ assert(rclist[0] >= 0); head = *alloc_head; pred = NULL; if (pronlen == 1) { /* Single-phone word */ int ci = dict_first_phone(lextree->dict, dictwid); /* Only non-filler words are mpx */ if (dict_filler_word(lextree->dict, dictwid)) { /* * Left diphone ID for single-phone words already assumes SIL is right * context; only left contexts need to be handled. */ lc_pnodelist = NULL; for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); /* Check if this ssid already allocated for some other context */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pnode = (fsg_pnode_t *) gnode_ptr(gn); if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { /* already allocated; share it for this context phone */ fsg_pnode_add_ctxt(pnode, lc); break; } } if (!gn) { /* ssid not already allocated */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; /* All root nodes linked together */ fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); } } glist_free(lc_pnodelist); } else { /* Filler word; no context modelled */ ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->next.fsglink = fsglink; pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ pnode->ppos = 0; pnode->leaf = TRUE; pnode->sibling = root; fsg_pnode_add_all_ctxt(&(pnode->ctxt)); pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); } } else { /* Multi-phone word */ fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ ssid_pnode_map = (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); lc_pnodelist = NULL; rc_pnodelist = NULL; for (p = 0; p < pronlen; p++) { int ci = dict_pron(lextree->dict, dictwid, p); if (p == 0) { /* Root phone, handle required left contexts */ /* Find if we already have an lc_pnodelist for the first phone of this word */ fsg_glist_linklist_t *predglist=*curglist; fsg_glist_linklist_t *glist=*curglist; rc = dict_pron(lextree->dict, dictwid, 1); while (glist && glist->glist && glist->ci != ci && glist->rc != rc){ glist = glist->next; } if (glist && glist->ci == ci && glist->rc == rc && glist->glist) { /* We've found a valid glist. Hook to it and move to next phoneme */ lc_pnodelist = glist->glist; /* Set the predecessor node for the future tree first */ pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); continue; } else { /* Two cases that can bring us here * a. glist == NULL, i.e. end of current list. Create new entry. * b. glist->glist == NULL, i.e. first entry into list. */ if (!glist) { /* Case a; reduce it to case b by allocing glist */ glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); glist->next = predglist; *curglist = glist; } glist->ci = ci; glist->rc = rc; glist->lc = -1; lc_pnodelist = glist->glist = NULL; /* Gets created below */ } for (i = 0; lclist[i] >= 0; i++) { lc = lclist[i]; ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); /* Compression is not done by d2p, so we do it * here. This might be slow, but it might not * be... we'll see. */ pnode = ssid_pnode_map[0]; for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { pnode = ssid_pnode_map[j]; if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) break; } assert(j < n_ci); if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* This bit is tricky! For now we'll put the prob in the final link only */ /* pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->wip + lextree->pip; */ pnode->logs2prob = lextree->wip + lextree->pip; pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); pnode->ppos = 0; pnode->leaf = FALSE; pnode->sibling = root; /* All root nodes linked together */ pnode->alloc_next = head; head = pnode; root = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); lc_pnodelist = glist_add_ptr(lc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } fsg_pnode_add_ctxt(pnode, lc); } /* Put the lc_pnodelist back into glist */ glist->glist = lc_pnodelist; /* The predecessor node for the future tree is the root */ pred = root; } else if (p != pronlen - 1) { /* Word internal phone */ fsg_pnode_t *pnodeyoungest; ssid = dict2pid_internal(lextree->d2p, dictwid, p); /* First check if we already have this ssid in our tree */ pnode = pred->next.succ; pnodeyoungest = pnode; /* The youngest sibling */ while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { pnode = pnode->sibling; } if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { /* Found the ssid; go to next phoneme */ pred = pnode; continue; } /* pnode not found, allocate it */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); pnode->ctx = lextree->ctx; pnode->logs2prob = lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = FALSE; pnode->sibling = pnodeyoungest; /* May be NULL */ if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); pred->next.succ = pnode; } } else { /* Predecessor = word internal node */ pred->next.succ = pnode; } pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); pred = pnode; } else { /* Leaf phone, handle required right contexts */ /* Note, leaf phones are not part of the tree */ xwdssid_t *rssid; memset((void *) ssid_pnode_map, 0, n_ci * sizeof(fsg_pnode_t *)); lc = dict_pron(lextree->dict, dictwid, p-1); rssid = dict2pid_rssid(lextree->d2p, ci, lc); for (i = 0; rclist[i] >= 0; i++) { rc = rclist[i]; j = rssid->cimap[rc]; ssid = rssid->ssid[j]; pnode = ssid_pnode_map[j]; if (!pnode) { /* Allocate pnode for this new ssid */ pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof (fsg_pnode_t)); pnode->ctx = lextree->ctx; /* We are plugging the word prob here. Ugly */ /* pnode->logs2prob = lextree->pip; */ pnode->logs2prob = fsg_link_logs2prob(fsglink) + lextree->pip; pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); pnode->ppos = p; pnode->leaf = TRUE; pnode->sibling = rc_pnodelist ? (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; pnode->next.fsglink = fsglink; pnode->alloc_next = head; head = pnode; hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, pnode->ci_ext); rc_pnodelist = glist_add_ptr(rc_pnodelist, (void *) pnode); ssid_pnode_map[j] = pnode; } else { assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); } fsg_pnode_add_ctxt(pnode, rc); } if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { pred = (fsg_pnode_t *) gnode_ptr(gn); if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); /* Since all entries of lc_pnodelist point to the same array, sufficient to update it once */ break; } } } else { /* Predecessor = word internal node */ if (!pred->next.succ) pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); else { /* Link to the end of the sibling chain */ fsg_pnode_t *succ = pred->next.succ; while (succ->sibling) succ = succ->sibling; succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); } } } } ckd_free((void *) ssid_pnode_map); /* glist_free(lc_pnodelist); Nope; this gets freed outside */ glist_free(rc_pnodelist); } *alloc_head = head; return root; }
lmset_t * lmset_read_ctl(const char *ctlfile, dict_t * dict, float64 lw, float64 wip, float64 uw, const char *lmdumpdir, logmath_t *logmath) { FILE *ctlfp; FILE *tmp; char lmfile[4096], lmname[4096], str[4096]; lmclass_set_t *lmclass_set; lmclass_t **lmclass, *cl; int32 n_lmclass, n_lmclass_used; int32 i; lm_t *lm; lmset_t *lms = NULL; tmp = NULL; E_INFO("Reading LM control file '%s'\n", ctlfile); if ((ctlfp = fopen(ctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open LM control file"); return NULL; } lmclass_set = lmclass_newset(); lms = (lmset_t *) ckd_calloc(1, sizeof(lmset_t)); lms->n_lm = 0; lms->n_alloc_lm = 0; if (fscanf(ctlfp, "%s", str) == 1) { if (strcmp(str, "{") == 0) { /* Load LMclass files */ while ((fscanf(ctlfp, "%s", str) == 1) && (strcmp(str, "}") != 0)) lmclass_set = lmclass_loadfile(lmclass_set, str, logmath); if (strcmp(str, "}") != 0) E_FATAL("Unexpected EOF(%s)\n", ctlfile); if (fscanf(ctlfp, "%s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; /* Fill in dictionary word id information for each LMclass word */ for (cl = lmclass_firstclass(lmclass_set); lmclass_isclass(cl); cl = lmclass_nextclass(lmclass_set, cl)) { /* For every words in the class, set the dictwid correctly The following piece of code replace s2's kb_init_lmclass_dictwid (cl); doesn't do any checking even the id is a bad dict id. This only sets the information in the lmclass_set, but not lm-2-dict or dict-2-lm map. In Sphinx 3, they are done in wid_dict_lm_map in wid.c. */ lmclass_word_t *w; int32 wid; for (w = lmclass_firstword(cl); lmclass_isword(w); w = lmclass_nextword(cl, w)) { wid = dict_wordid(dict, lmclass_getword(w)); #if 0 E_INFO("In class %s, Word %s, wid %d\n", cl->name, lmclass_getword(w), wid); #endif lmclass_set_dictwid(w, wid); } } /* At this point if str[0] != '\0', we have an LM filename */ n_lmclass = lmclass_get_nclass(lmclass_set); lmclass = (lmclass_t **) ckd_calloc(n_lmclass, sizeof(lmclass_t *)); E_INFO("Number of LM class specified %d in file %s\n", n_lmclass, ctlfile); /* Read in one LM at a time */ while (str[0] != '\0') { strcpy(lmfile, str); if (fscanf(ctlfp, "%s", lmname) != 1) E_FATAL("LMname missing after LMFileName '%s'\n", lmfile); n_lmclass_used = 0; if (fscanf(ctlfp, "%s", str) == 1) { if (strcmp(str, "{") == 0) { while ((fscanf(ctlfp, "%s", str) == 1) && (strcmp(str, "}") != 0)) { if (n_lmclass_used >= n_lmclass) { E_FATAL("Too many LM classes specified for '%s'\n", lmfile); } lmclass[n_lmclass_used] = lmclass_get_lmclass(lmclass_set, str); if (!(lmclass_isclass(lmclass[n_lmclass_used]))) E_FATAL("LM class '%s' not found\n", str); n_lmclass_used++; } if (strcmp(str, "}") != 0) E_FATAL("Unexpected EOF(%s)\n", ctlfile); if (fscanf(ctlfp, "%s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; lm = (lm_t *) lm_read_advance(lmfile, lmname, lw, wip, uw, dict_size(dict), NULL, 1, logmath, FALSE, FALSE); if (n_lmclass_used > 0) { E_INFO("Did I enter here?\n"); lm_build_lmclass_info(lm, lw, uw, wip, n_lmclass_used, lmclass); } if (lms->n_lm == lms->n_alloc_lm) { lms->lmarray = (lm_t **) ckd_realloc(lms->lmarray, (lms->n_alloc_lm + LM_ALLOC_BLOCK) * sizeof(lm_t *)); lms->n_alloc_lm += LM_ALLOC_BLOCK; } lms->lmarray[lms->n_lm] = lm; lms->n_lm += 1; E_INFO("%d %d\n", lms->n_alloc_lm, lms->n_lm); } assert(lms); assert(lms->lmarray); E_INFO("No. of LM set allocated %d, no. of LM %d \n", lms->n_alloc_lm, lms->n_lm); if (dict != NULL) { for (i = 0; i < lms->n_lm; i++) { assert(lms->lmarray[i]); assert(dict); if ((lms->lmarray[i]->dict2lmwid = wid_dict_lm_map(dict, lms->lmarray[i], lw)) == NULL) E_FATAL ("Dict/LM word-id mapping failed for LM index %d, named %s\n", i, lmset_idx_to_name(lms, i)); } } else { E_FATAL ("Dict is specified to be NULL (dict_init is not called before lmset_read_lm?), dict2lmwid is not built inside lmset_read_lm\n"); } ckd_free(lmclass_set); ckd_free(lmclass); fclose(ctlfp); return lms; }
int read_s3hypseg_line(char *line, seg_hyp_line_t * seg_hyp_line, lm_t * lm, dict_t * dict) { char *p, str[128]; conf_srch_hyp_t *hyp_word, *tail, *g, *h; int sum, t, i; s3wid_t wid; p = line; if (!get_word(&p, str)) { printf("failed to read sequence number in the line: %s\n", line); return HYPSEG_FAILURE; } strcpy(seg_hyp_line->seq, str); if (!get_word(&p, str) || strcmp(str, "S")) E_FATAL("failed to read S in the line: %s\n", line); get_word(&p, str); if (!get_word(&p, str) || strcmp(str, "T")) E_FATAL("failed to read T in the line: %s\n", line); if (!get_word(&p, str)) E_FATAL("failed to read ascr+lscr in the line: %s\n", line); sum = atoi(str); if (!get_word(&p, str) || strcmp(str, "A")) E_FATAL("failed to read A in the line: %s\n", line); if (!get_word(&p, str)) E_FATAL("failed to read ascr in the line: %s\n", line); seg_hyp_line->ascr = atoi(str); if (!get_word(&p, str) || strcmp(str, "L")) E_FATAL("failed to read L in the line: %s\n", line); if (!get_word(&p, str)) E_FATAL("failed to read lscr in the line: %s\n", line); seg_hyp_line->lscr = atoi(str); #if 0 if (!get_word(&p, str) || strcmp(str, "0")) { E_FATAL("failed to find 0 in the line: %s\n", line); } #endif if (seg_hyp_line->ascr + seg_hyp_line->lscr != sum) { E_FATAL("the sum of ascr and lscr %d is wrong (%d): %s\n", seg_hyp_line->ascr + seg_hyp_line->lscr, sum, line); } seg_hyp_line->wordlist = NULL; seg_hyp_line->wordno = 0; seg_hyp_line->nfr = 0; seg_hyp_line->cscore = WORST_CONFIDENCE_SCORE; tail = NULL; while (1) { if (!get_word(&p, str)) E_FATAL("failed to read sf or nfr in the line: %s\n", line); t = atoi(str); if (!get_word(&p, str)) { seg_hyp_line->nfr = t; break; } if ((hyp_word = (conf_srch_hyp_t *) ckd_calloc(1, sizeof(conf_srch_hyp_t))) == NULL || (hyp_word->sh.word = (char *) ckd_calloc(1024, sizeof(char))) == NULL) { E_FATAL("fail to allocate memory\n"); } hyp_word->sh.sf = t; hyp_word->sh.ascr = atoi(str); hyp_word->next = NULL; if (!get_word(&p, str)) E_FATAL("failed to read lscr in the line: %s\n", line); hyp_word->sh.lscr = atoi(str); if (!get_word(&p, str)) E_FATAL("failed to read word in the line: %s\n", line); strcpy(hyp_word->sh.word, str); for (i = strlen(str) - 1; i >= 0; i--) if (str[i] == '(') break; if (i >= 0) str[i] = '\0'; if (dict) { wid = dict_wordid(dict, str); if (wid == BAD_S3WID) { E_FATAL("String %s doesn't exists in the dictionary\n", str); } hyp_word->sh.id = wid; } hyp_word->compound = 0; hyp_word->matchtype = 0; seg_hyp_line->wordno++; if (seg_hyp_line->wordlist == NULL) seg_hyp_line->wordlist = hyp_word; else tail->next = hyp_word; tail = hyp_word; } if (seg_hyp_line->wordlist == NULL) { printf("word list is NULL\n"); return HYPSEG_FAILURE; } g = seg_hyp_line->wordlist; for (h = g->next; h; h = h->next) { g->sh.ef = h->sh.sf - 1; g = h; } g->sh.ef = seg_hyp_line->nfr; sum = 0; for (h = seg_hyp_line->wordlist; h; h = h->next) sum += h->sh.ascr; if (sum != seg_hyp_line->ascr) { E_FATAL ("the ascr of words is not equal to the ascr of utt: %s (sum %d != tot %d). \n", line, sum, seg_hyp_line->ascr); } sum = 0; for (h = seg_hyp_line->wordlist; h; h = h->next) sum += h->sh.lscr; if (sum != seg_hyp_line->lscr) E_WARN ("the lscr of words is not equal to the lscr of utt: %s %d %d\n", seg_hyp_line->seq, sum, seg_hyp_line->lscr); for (h = seg_hyp_line->wordlist; h; h = h->next) { if (h->sh.ef < h->sh.sf) { E_FATAL("word %s ef (%d) <= sf (%d)in the line: %s\n", h->sh.word, h->sh.ef, h->sh.sf, line); } } return HYPSEG_SUCCESS; }