Ejemplo n.º 1
0
main(int32 argc, char *argv[])
{
    corpus_t *ch, *ct;
    char id[4096], *str;

    if (argc != 3)
        E_FATAL("Usage: %s headid-corpusfile tailid-corpusfile\n",
                argv[0]);

    ch = corpus_load_headid(argv[1], NULL, NULL);
    ct = corpus_load_tailid(argv[2], NULL, NULL);
    for (;;) {
        printf("> ");
        scanf("%s", id);

        str = corpus_lookup(ch, id);
        if (str == NULL)
            printf("%s Not found in 1\n");
        else
            printf("%s(1): %s\n", id, str);

        str = corpus_lookup(ct, id);
        if (str == NULL)
            printf("%s Not found in 2\n");
        else
            printf("%s(2): %s\n", id, str);
    }
}
Ejemplo n.º 2
0
static void process_utt (char *uttfile, int32 sf, int32 ef, char *uttid)
{
    int32 i, f, nwd;
    char *str;
    char tmp[65535], *wdp[4096];
    
    for (i = 0; i < n_inhyp; i++) {
	if ((str = corpus_lookup (inhyp[i], uttid)) != NULL)
	    break;
    }
    if (i >= n_inhyp)
	E_ERROR("%s: Missing\n", uttid);
    else {
	strcpy (tmp, str);
	if ((nwd = str2words (tmp, wdp, 4095)) < 0)
	    E_FATAL("str2words failed\n");
	if ((nwd == 0) || (sscanf (wdp[nwd-1], "%d", &f) != 1) || (f != (ef-sf+1)))
	    E_ERROR("%s: Bad hyp in %s: %s\n", uttid, infilename[i], str);
	else {
	    fprintf (outfp, "%s %s\n", uttid, str);
	    fflush (outfp);
	    E_INFO("%s: Extracted from %s\n", uttid, infilename[i]);
	}
    }
}
Ejemplo n.º 3
0
void lmcontext_load (corpus_t *corp, char *uttid, s3wid_t *pred, s3wid_t *succ)
{
    char *str, wd[4096], *strp;
    s3wid_t w[3];
    int32 i, n;
    dict_t *dict;
    s3lmwid_t lwid;
    
    if ((str = corpus_lookup (corp, uttid)) == NULL)
	E_FATAL("Couldn't find LM context for %s\n", uttid);
    dict = dict_getdict ();
    
    strp = str;
    for (i = 0; i < 4; i++) {
	if (sscanf (strp, "%s%n", wd, &n) != 1) {
	    if (i < 3)
		E_FATAL("Bad LM context spec for %s: %s\n", uttid, str);
	    else
		break;
	}
	strp += n;
	
	if (strcmp (wd, "-") == 0)
	    w[i] = BAD_WID;
	else {
	    w[i] = dict_wordid (wd);
	    if (NOT_WID(w[i]))
		E_FATAL("LM context word (%s) for %s not in dictionary\n", wd, uttid);
	    w[i] = dict_basewid(w[i]);
	    
	    switch (i) {
	    case 0: 
		if ((n = dict->word[w[0]].n_comp) > 0)
		    w[0] = dict->word[w[0]].comp[n-1].wid;
		break;
		
	    case 1:
		if ((n = dict->word[w[1]].n_comp) > 0) {
		    w[0] = dict->word[w[1]].comp[n-2].wid;
		    w[1] = dict->word[w[1]].comp[n-1].wid;
		}
		break;
		
	    case 2:
		if (w[2] != dict_wordid(FINISH_WORD))
		    E_FATAL("Illegal successor LM context for %s: %s\n", uttid, str);
		break;
		
	    default:
		assert (0);	/* Should never get here */
		break;
	    }
	}
    }
    
    if (IS_WID(w[0]) && NOT_WID(w[1]))
	E_FATAL("Bad LM context spec for %s: %s\n", uttid, str);

    for (i = 0; i < 3; i++) {
	if (IS_WID(w[i])) {
	    lwid = lm_lmwid (w[i]);
	    if (NOT_LMWID(lwid))
		E_FATAL("LM context word (%s) for %s not in LM\n", wd, uttid);
	}
    }
    
    pred[0] = w[0];
    pred[1] = w[1];
    *succ = w[2];
}