main(int32 argc, char *argv[]) { corpus_t *ch, *ct; char id[4096], *str; if (argc != 3) E_FATAL("Usage: %s headid-corpusfile tailid-corpusfile\n", argv[0]); ch = corpus_load_headid(argv[1], NULL, NULL); ct = corpus_load_tailid(argv[2], NULL, NULL); for (;;) { printf("> "); scanf("%s", id); str = corpus_lookup(ch, id); if (str == NULL) printf("%s Not found in 1\n"); else printf("%s(1): %s\n", id, str); str = corpus_lookup(ct, id); if (str == NULL) printf("%s Not found in 2\n"); else printf("%s(2): %s\n", id, str); } }
static void process_utt (char *uttfile, int32 sf, int32 ef, char *uttid) { int32 i, f, nwd; char *str; char tmp[65535], *wdp[4096]; for (i = 0; i < n_inhyp; i++) { if ((str = corpus_lookup (inhyp[i], uttid)) != NULL) break; } if (i >= n_inhyp) E_ERROR("%s: Missing\n", uttid); else { strcpy (tmp, str); if ((nwd = str2words (tmp, wdp, 4095)) < 0) E_FATAL("str2words failed\n"); if ((nwd == 0) || (sscanf (wdp[nwd-1], "%d", &f) != 1) || (f != (ef-sf+1))) E_ERROR("%s: Bad hyp in %s: %s\n", uttid, infilename[i], str); else { fprintf (outfp, "%s %s\n", uttid, str); fflush (outfp); E_INFO("%s: Extracted from %s\n", uttid, infilename[i]); } } }
void lmcontext_load (corpus_t *corp, char *uttid, s3wid_t *pred, s3wid_t *succ) { char *str, wd[4096], *strp; s3wid_t w[3]; int32 i, n; dict_t *dict; s3lmwid_t lwid; if ((str = corpus_lookup (corp, uttid)) == NULL) E_FATAL("Couldn't find LM context for %s\n", uttid); dict = dict_getdict (); strp = str; for (i = 0; i < 4; i++) { if (sscanf (strp, "%s%n", wd, &n) != 1) { if (i < 3) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); else break; } strp += n; if (strcmp (wd, "-") == 0) w[i] = BAD_WID; else { w[i] = dict_wordid (wd); if (NOT_WID(w[i])) E_FATAL("LM context word (%s) for %s not in dictionary\n", wd, uttid); w[i] = dict_basewid(w[i]); switch (i) { case 0: if ((n = dict->word[w[0]].n_comp) > 0) w[0] = dict->word[w[0]].comp[n-1].wid; break; case 1: if ((n = dict->word[w[1]].n_comp) > 0) { w[0] = dict->word[w[1]].comp[n-2].wid; w[1] = dict->word[w[1]].comp[n-1].wid; } break; case 2: if (w[2] != dict_wordid(FINISH_WORD)) E_FATAL("Illegal successor LM context for %s: %s\n", uttid, str); break; default: assert (0); /* Should never get here */ break; } } } if (IS_WID(w[0]) && NOT_WID(w[1])) E_FATAL("Bad LM context spec for %s: %s\n", uttid, str); for (i = 0; i < 3; i++) { if (IS_WID(w[i])) { lwid = lm_lmwid (w[i]); if (NOT_LMWID(lwid)) E_FATAL("LM context word (%s) for %s not in LM\n", wd, uttid); } } pred[0] = w[0]; pred[1] = w[1]; *succ = w[2]; }