/*
 * Write exact hypothesis.  Format:
 *   <id> T <scr> A <ascr> L <lscr> {<sf> <wascr> <wlscr> <word>}... <ef>
 * where:
 *   scr = ascr + (lscr*lw+N*wip), where N = #words excluding <s>
 *   ascr = scaled acoustic score for entire utterance
 *   lscr = LM score (without lw or wip) for entire utterance
 *   sf = start frame for word
 *   wascr = scaled acoustic score for word
 *   wlscr = LM score (without lw or wip) for word
 *   ef = end frame for utterance.
 */
static void log_hypseg (char *uttid,
			FILE *fp,	/* Out: output file */
			hyp_t *hypptr,	/* In: Hypothesis */
			int32 nfrm)	/* In: #frames in utterance */
{
    hyp_t *h;
    int32 ascr, lscr, tscr;
    
    ascr = lscr = tscr = 0;
    for (h = hypptr; h; h = h->next) {
	ascr += h->ascr;
	if (dict_basewid(h->wid) != startwid) {
	    lscr += lm_rawscore (h->lscr, 1.0);
	} else {
	    assert (h->lscr == 0);
	}
	tscr += h->ascr + h->lscr;
    }

    fprintf (fp, "%s T %d A %d L %d", uttid, tscr, ascr, lscr);
    
    if (! hypptr)	/* HACK!! */
	fprintf (fp, " (null)\n");
    else {
	for (h = hypptr; h; h = h->next) {
	    lscr = (dict_basewid(h->wid) != startwid) ? lm_rawscore (h->lscr, 1.0) : 0;
	    fprintf (fp, " %d %d %d %s", h->sf, h->ascr, lscr, dict_wordstr (h->wid));
	}
	fprintf (fp, " %d\n", nfrm);
    }
    
    fflush (fp);
}
Exemplo n.º 2
0
void
match_write(FILE * fp, glist_t hyp, char *uttid, dict_t * dict, char *hdr)
{
    gnode_t *gn;
    srch_hyp_t *h;
    int counter = 0;

    if (fp == NULL)
        return;

    if (hyp == NULL)            /* Following s3.0 convention */
        fprintf(fp, "(null)");

    fprintf(fp, "%s", (hdr ? hdr : ""));

/*     for (gn = hyp; gn && (gnode_next(gn)); gn = gnode_next(gn)) { */
    for (gn = hyp; gn; gn = gnode_next(gn)) {
        h = (srch_hyp_t *) gnode_ptr(gn);

        if (h->sf != h->ef) {   /* FSG outputs zero-width hyps */
            if ((!dict_filler_word(dict, h->id))
                && (h->id != dict_finishwid(dict))
                && (h->id != dict_startwid(dict)))
                fprintf(fp, "%s ",
                        dict_wordstr(dict, dict_basewid(dict, h->id)));
            counter++;
        }
    }
    if (counter == 0)
        fprintf(fp, " ");
    fprintf(fp, "(%s)\n", uttid);
    fflush(fp);
}
Exemplo n.º 3
0
Arquivo: dict.c Projeto: 10v/cmusphinx
main (int32 argc, char *argv[])
{
    mdef_t *m;
    dict_t *d;
    char wd[1024];
    s3wid_t wid;
    int32 p;
    
    if (argc < 3)
	E_FATAL("Usage: %s {mdeffile | NULL} dict [fillerdict]\n", argv[0]);
    
    m = (strcmp (argv[1], "NULL") != 0) ? mdef_init (argv[1]) : NULL;
    d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), '_');
    
    for (;;) {
	printf ("word> ");
	scanf ("%s", wd);
	
	wid = dict_wordid (d, wd);
	if (NOT_WID(wid))
	    E_ERROR("Unknown word\n");
	else {
	    for (wid = dict_basewid(d, wid); IS_WID(wid); wid = d->word[wid].alt) {
		printf ("%s\t", dict_wordstr(d, wid));
		for (p = 0; p < d->word[wid].pronlen; p++)
		    printf (" %s", dict_ciphone_str (d, wid, p));
		printf ("\n");
	    }
	}
    }
}
Exemplo n.º 4
0
/* Write hypothesis in old (pre-Nov95) NIST format */
void
log_hypstr(FILE * fp, srch_hyp_t * hypptr, char *uttid, int32 exact,
           int32 scr, dict_t * dict)
{
    srch_hyp_t *h;
    s3wid_t w;

    if (fp == NULL)
        return;

    if (!hypptr)                /* HACK!! */
        fprintf(fp, "(null)");

    for (h = hypptr; h; h = h->next) {
        if (h->sf != h->ef) {   /* Take care of abnormality caused by
                                 * FSG search or various different
                                 * reasons */
            w = h->id;
            if (!exact) {
                w = dict_basewid(dict, w);
                if ((w != dict->startwid) && (w != dict->finishwid)
                    && (!dict_filler_word(dict, w)))
                    fprintf(fp, "%s ", dict_wordstr(dict, w));
            }
            else
                fprintf(fp, "%s ", dict_wordstr(dict, w));
        }
    }
    if (scr != 0)
        fprintf(fp, " (%s %d)\n", uttid, scr);
    else
        fprintf(fp, " (%s)\n", uttid);
    fflush(fp);
}
static void
ngram_fwdflat_expand_all(ngram_search_t *ngs)
{
    int n_words, i;

    /* For all "real words" (not fillers or <s>/</s>) in the dictionary,
     *
     * 1) Add the ones which are in the LM to the fwdflat wordlist
     * 2) And to the expansion list (since we are expanding all)
     */
    ngs->n_expand_words = 0;
    n_words = ps_search_n_words(ngs);
    bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
    for (i = 0; i < n_words; ++i) {
        if (!ngram_model_set_known_wid(ngs->lmset,
                                       dict_basewid(ps_search_dict(ngs),i)))
            continue;
        ngs->fwdflat_wordlist[ngs->n_expand_words] = i;
        ngs->expand_word_list[ngs->n_expand_words] = i;
        bitvec_set(ngs->expand_word_flag, i);
        ngs->n_expand_words++;
    }
    E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words);
    ngs->expand_word_list[ngs->n_expand_words] = -1;
    ngs->fwdflat_wordlist[ngs->n_expand_words] = -1;
}
Exemplo n.º 6
0
static void pronerr_output (char *id, s3wid_t *ref, int32 nref,
			    wseg_t *wseg, s3cipid_t *ap, int8 *ap_err,
			    int32 ws, int32 we, int32 ps, int32 pe)
{
    int32 j;
    s3wid_t rcwid, lcwid;
    char str[4096];
    
    /* Word sequence for region in error */
    sprintf (str, "%s", dict_wordstr (dict, dict_basewid(dict, ref[ws])));
    for (j = ws+1; j <= we; j++) {
	strcat (str, " ");
	strcat (str, dict_wordstr (dict, dict_basewid(dict, ref[j])));
    }
    printf ("%-22s\t=>\t", str);

    /* Print left context phone */
    /*lcwid = ((wseg[ws].s < 0) && (ws > 0) && IS_WID(ref[ws-1])) ? ref[ws-1] : BAD_WID;*/
    lcwid = (ws > 0) ? ref[ws-1] : BAD_WID;
    if (IS_WID(lcwid)) {
	j = dict->word[lcwid].pronlen - 1;
	sprintf (str, "(%s)", mdef_ciphone_str (mdef, dict->word[lcwid].ciphone[j]));
    } else
	strcpy (str, "()");
    printf ("%-5s", str);
    
    /* Phone sequence for region in error */
    for (j = ps; j <= pe; j++) {
	strcpy (str, mdef_ciphone_str (mdef, ap[j]));
	if (ap_err[j])
	    ucase (str);
	else
	    lcase (str);
	
	printf (" %s", str);
    }
    
    /* Right context if ending in error */
    /* rcwid = ((wseg[we].e < 0) && IS_WID(ref[we+1])) ? ref[we+1] : BAD_WID; */
    rcwid = ref[we+1];
    if (IS_WID(rcwid))
	printf ("\t(%s)", mdef_ciphone_str (mdef, dict->word[rcwid].ciphone[0]));
    else
	printf ("\t()");

    printf (" ( %s )\n", id);
}
Exemplo n.º 7
0
/*
 * Write exact hypothesis.  Format
 *   <id> S <scl> T <scr> A <ascr> L <lscr> {<sf> <wascr> <wlscr> <word>}... <ef>
 * where:
 *   scl = acoustic score scaling for entire utterance
 *   scr = ascr + (lscr*lw+N*wip), where N = #words excluding <s>
 *   ascr = scaled acoustic score for entire utterance
 *   lscr = LM score (without lw or wip) for entire utterance
 *   sf = start frame for word
 *   wascr = scaled acoustic score for word
 *   wlscr = LM score (without lw or wip) for word
 *   ef = end frame for utterance.
 */
void
log_hypseg(char *uttid, FILE * fp,      /* Out: output file */
           srch_hyp_t * hypptr, /* In: Hypothesis */
           int32 nfrm,          /* In: #frames in utterance */
           int32 scl,           /* In: Acoustic scaling for entire utt */
           float64 lwf,         /* In: LM score scale-factor (in dagsearch) */
           dict_t * dict,       /* In: dictionary */
           lm_t * lm, int32 unnorm
                                /**< Whether unscaled the score back */
    )
{
    srch_hyp_t *h;
    int32 ascr, lscr, tscr;

    if (fp == NULL)
        return;

    ascr = lscr = tscr = 0;
    for (h = hypptr; h; h = h->next) {
        ascr += h->ascr;
        if (dict_basewid(dict, h->id) != dict->startwid) {
            lscr += lm_rawscore(lm, h->lscr);
        }
        else {
            assert(h->lscr == 0);
        }
        tscr += h->ascr + h->lscr;
    }

    fprintf(fp, "%s S %d T %d A %d L %d", uttid, scl, tscr, ascr, lscr);

    if (!hypptr)                /* HACK!! */
        fprintf(fp, " (null)\n");
    else {
        for (h = hypptr; h; h = h->next) {
            lscr =
                (dict_basewid(dict, h->id) !=
                 dict->startwid) ? lm_rawscore(lm, h->lscr) : 0;
            fprintf(fp, " %d %d %d %s", h->sf, h->ascr, lscr,
                    dict_wordstr(dict, h->id));
        }
        fprintf(fp, " %d\n", nfrm);
    }

    fflush(fp);
}
Exemplo n.º 8
0
Arquivo: dict.c Projeto: 10v/cmusphinx
/*
 * Scan the dictionary for compound words.  This function should be called just after
 * loading the dictionary.  For the moment, compound words in a compound word are
 * assumed to be separated by the given sep character, (underscore in the CMU dict).
 * Return value: #compound words found in dictionary.
 */
static int32 dict_build_comp (dict_t *d,
			      char sep)		/* Separator character */
{
    char wd[4096];
    int32 w, cwid;
    dictword_t *wordp;
    int32 nc;		/* # compound words in dictionary */
    int32 i, j, l, n;
    
    nc = 0;
    for (w = 0; w < d->n_word; w++) {
	wordp = d->word + dict_basewid(d, w);
	strcpy (wd, wordp->word);
	l = strlen(wd);
	if ((wd[0] == sep) || (wd[l-1] == sep))
	    E_FATAL("Bad compound word %s: leading or trailing separator\n", wordp->word);
	
	/* Count no. of components in this word */
	n = 1;
	for (i = 1; i < l-1; i++)	/* 0 and l-1 already checked above */
	    if (wd[i] == sep)
		n++;
	if (n == 1)
	    continue;		/* Not a compound word */
	nc++;
	
	if ((w == d->startwid) || (w == d->finishwid) || dict_filler_word (d, w))
	    E_FATAL("Compound special/filler word (%s) not allowed\n", wordp->word);
	
	/* Allocate and fill in component word info */
	wordp->n_comp = n;
	wordp->comp = (s3wid_t *) ckd_calloc (n, sizeof(s3wid_t));
	
	/* Parse word string into components */
	n = 0;
	for (i = 0; i < l; i++) {
	    for (j = i; (i < l) && (wd[i] != sep); i++);
	    if (j == i)
		E_FATAL("Bad compound word %s: successive separators\n", wordp->word);
	    
	    wd[i] = '\0';
	    cwid = dict_wordid (d, wd+j);
	    if (NOT_WID(cwid))
		E_FATAL("Component word %s of %s not in dictionary\n", wd+j, wordp->word);
	    wordp->comp[n] = cwid;
	    n++;
	}
    }
    
    if (nc > 0)
	d->comp_head = dict_comp_head (d);
    
    return nc;
}
Exemplo n.º 9
0
Arquivo: dict.c Projeto: 10v/cmusphinx
int32 dict_filler_word (dict_t *d, s3wid_t w)
{
    assert (d);
    assert ((w >= 0) && (w < d->n_word));
    
    w = dict_basewid(d, w);
    if ((w == d->startwid) || (w == d->finishwid))
	return 0;
    if ((w >= d->filler_start) && (w <= d->filler_end))
	return 1;
    return 0;
}
Exemplo n.º 10
0
main (int32 argc, char *argv[])
{
    dict_t **d;
    int32 i, k, p, wid;
    char line[16384], *wp[1024];
    
    if (argc < 2) {
	E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]);
	exit(0);
    }
    d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *));
    
    for (i = 1; i < argc; i++)
	d[i-1] = dict_init (NULL, argv[i], NULL, 0);
    
    while (fgets (line, sizeof(line), stdin) != NULL) {
	if ((k = str2words (line, wp, 1024)) < 0)
	    E_FATAL("Line too long: %s\n", line);
	if (k > 2)
	    E_FATAL("Vocab entry contains too many words\n");
	
	if (k == 0)
	    continue;
	if (k == 1)
	    wp[1] = wp[0];
	
	/* Look up word in each dictionary until found */
	k = 0;
	for (i = 0; (i < argc-1) && (k == 0); i++) {
	    wid = dict_wordid (d[i], wp[1]);
	    if (NOT_WID(wid))
		continue;
	    
	    for (wid = dict_basewid(d[i], wid);
		 IS_WID(wid);
		 wid = dict_nextalt(d[i], wid)) {
		k++;
		if (k == 1)
		    printf ("%s\t", wp[0]);
		else
		    printf ("%s(%d)\t", wp[0], k);
		
		for (p = 0; p < dict_pronlen(d[i], wid); p++)
		    printf (" %s", dict_ciphone_str (d[i], wid, p));
		printf ("\n");
	    }
	}
	if (k == 0)
	    E_ERROR("No pronunciation for: '%s'\n", wp[0]);
    }
}
int32 align_init ( void )
{
    int32 k;
    s3wid_t w;
    float64 *f64arg;
    
    mdef = mdef_getmdef ();
    tmat = tmat_gettmat ();
    dict = dict_getdict ();
    
    assert (mdef && tmat && dict);
    
    startwid = dict_wordid (START_WORD);
    finishwid = dict_wordid (FINISH_WORD);
    silwid = dict_wordid (SILENCE_WORD);
    
    if ((NOT_WID(startwid)) || (NOT_WID(finishwid)))
	E_FATAL("%s or %s not in dictionary\n", START_WORD, FINISH_WORD);
    if (NOT_WID(silwid))
	E_ERROR("%s not in dictionary; no optional silence inserted between words\n",
	       SILENCE_WORD);

    /* Create list of optional filler words to be inserted between transcript words */
    fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3),
				      sizeof(s3wid_t));
    k = 0;
    if (IS_WID(silwid))
	fillwid[k++] = silwid;
    for (w = dict->filler_start; w <= dict->filler_end; w++) {
	if ((dict_basewid (w) == w) &&
	    (w != silwid) && (w != startwid) && (w != finishwid))
	    fillwid[k++] = w;
    }
    fillwid[k] = BAD_WID;

    f64arg = (float64 *) cmd_ln_access ("-beam");
    beam = logs3 (*f64arg);
    E_INFO ("logs3(beam)= %d\n", beam);

    score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32));
    
    hist_head = NULL;
    
    align_stseg = NULL;
    align_phseg = NULL;
    align_wdseg = NULL;

    ctr_nstate = counter_new ("NS");
    
    return 0;
}
Exemplo n.º 12
0
int
dict_altid(dict_t *d, int32 wid)
{
    char const *basestr, *wordstr, *c;

    if (wid < 0 || wid >= d->n_word)
        return 0;

    if (wid == dict_basewid(d, wid))
        return 1;
    /* FIXME: the order in which they appear in nextalt is unrelated
     * to the actual alterate indices in the words... */
    wordstr = dict_wordstr(d, wid);
    basestr = dict_basestr(d, wid);
    assert(strlen(wordstr) > strlen(basestr));
    c = wordstr + strlen(basestr);
    assert(*c == '(');
    ++c;
    return atoi(c);
}
/* Write hypothesis in old (pre-Nov95) NIST format */
static void log_hypstr (FILE *fp, hyp_t *hypptr, char *uttid, int32 scr)
{
    hyp_t *h;
    s3wid_t w;

    if (! hypptr)	/* HACK!! */
	fprintf (fp, "(null)");
    
    for (h = hypptr; h; h = h->next) {
	w = dict_basewid (h->wid);
	if ((w != startwid) && (w != finishwid) && (! dict_filler_word (w)))
	    fprintf (fp, "%s ", dict_wordstr(w));
    }

    if (scr != 0)
	fprintf (fp, " (%s %d)\n", uttid, scr);
    else
	fprintf (fp, " (%s)\n", uttid);

    fflush (fp);
}
Exemplo n.º 14
0
Arquivo: dict.c Projeto: 10v/cmusphinx
s3wid_t dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len)
{
    s3wid_t w;
    int32 i;
    
    if (! d->comp_head)
	return BAD_WID;
    
    assert (len > 1);
    
    for (w = d->comp_head[wid[0]]; IS_WID(w); w = d->comp_head[w]) {
	/* w is a compound word beginning with wid[0]; check if rest matches */
	assert (d->word[w].n_comp > 1);
	assert (d->word[w].comp[0] == wid[0]);
	
	if (d->word[w].n_comp == len) {
	    for (i = 0; (i < len) && (d->word[w].comp[i] == wid[i]); i++);
	    if (i == len)
		return (dict_basewid(d, w));
	}
    }

    return BAD_WID;
}
Exemplo n.º 15
0
word_fsg_t *
word_fsg_load(s2_fsg_t * fsg,
              int use_altpron, int use_filler,
	      kbcore_t *kbc)
{
    float32 silprob = kbc->fillpen->silprob;
    float32 fillprob = kbc->fillpen->fillerprob;
    float32 lw = kbc->fillpen->lw;
    word_fsg_t *word_fsg;
    s2_fsg_trans_t *trans;
    int32 n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk;
    int32 wid;
    int32 logp;
    glist_t nulls;
    int32 i, j;

    assert(fsg);

    /* Some error checking */
    if (lw <= 0.0)
        E_WARN("Unusual language-weight value: %.3e\n", lw);
    if (use_filler && ((silprob < 0.0) || (fillprob < 0.0))) {
        E_ERROR("silprob/fillprob must be >= 0\n");
        return NULL;
    }
    if ((fsg->n_state <= 0)
        || ((fsg->start_state < 0) || (fsg->start_state >= fsg->n_state))
        || ((fsg->final_state < 0) || (fsg->final_state >= fsg->n_state))) {
        E_ERROR("Bad #states/start_state/final_state values: %d/%d/%d\n",
                fsg->n_state, fsg->start_state, fsg->final_state);
        return NULL;
    }
    for (trans = fsg->trans_list; trans; trans = trans->next) {
        if ((trans->from_state < 0) || (trans->from_state >= fsg->n_state)
            || (trans->to_state < 0) || (trans->to_state >= fsg->n_state)
            || (trans->prob <= 0) || (trans->prob > 1.0)) {
            E_ERROR("Bad transition: P(%d -> %d) = %e\n",
                    trans->from_state, trans->to_state, trans->prob);
            return NULL;
        }
    }


    word_fsg = (word_fsg_t *) ckd_calloc(1, sizeof(word_fsg_t));
    word_fsg->name = ckd_salloc(fsg->name ? fsg->name : "");
    word_fsg->n_state = fsg->n_state;
    word_fsg->start_state = fsg->start_state;
    word_fsg->final_state = fsg->final_state;
    word_fsg->use_altpron = use_altpron;
    word_fsg->use_filler = use_filler;
    word_fsg->lw = lw;
    word_fsg->lc = NULL;
    word_fsg->rc = NULL;
    word_fsg->dict = kbc->dict;
    word_fsg->mdef = kbc->mdef;
    word_fsg->tmat = kbc->tmat;
    word_fsg->n_ciphone = mdef_n_ciphone(kbc->mdef);


    /* Allocate non-epsilon transition matrix array */
    word_fsg->trans = (glist_t **) ckd_calloc_2d(word_fsg->n_state,
                                                 word_fsg->n_state,
                                                 sizeof(glist_t));
    /* Allocate epsilon transition matrix array */
    word_fsg->null_trans = (word_fsglink_t ***)
        ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state,
                      sizeof(word_fsglink_t *));

    /* Process transitions */
    n_null_trans = 0;
    n_alt_trans = 0;
    n_filler_trans = 0;
    n_unk = 0;
    nulls = NULL;

    for (trans = fsg->trans_list, n_trans = 0;
         trans; trans = trans->next, n_trans++) {
        /* Convert prob to logs2prob and apply language weight */
        logp = (int32) (logs3(kbcore_logmath(kbc), trans->prob) * lw);

        /* Check if word is in dictionary */
        if (trans->word) {
            wid = dict_wordid(kbc->dict, trans->word);
            if (wid < 0) {
                E_ERROR("Unknown word '%s'; ignored\n", trans->word);
                n_unk++;
            }
            else if (use_altpron) {
                wid = dict_basewid(kbc->dict, wid);
                assert(wid >= 0);
            }
        }
        else
            wid = -1;           /* Null transition */

        /* Add transition to word_fsg structure */
        i = trans->from_state;
        j = trans->to_state;
        if (wid < 0) {
            if (word_fsg_null_trans_add(word_fsg, i, j, logp) == 1) {
                n_null_trans++;
                nulls =
                    glist_add_ptr(nulls,
                                  (void *) word_fsg->null_trans[i][j]);
            }
        }
        else {
            word_fsg_trans_add(word_fsg, i, j, logp, wid);

            /* Add transitions for alternative pronunciations, if any */
            if (use_altpron) {
                for (wid = dict_nextalt(kbc->dict, wid);
                     wid >= 0; wid = dict_nextalt(kbc->dict, wid)) {
                    word_fsg_trans_add(word_fsg, i, j, logp, wid);
                    n_alt_trans++;
                    n_trans++;
                }
            }
        }
    }

    /* Add silence and noise filler word transitions if specified */
    if (use_filler) {
        n_filler_trans = word_fsg_add_filler(word_fsg, silprob, fillprob, kbcore_logmath(kbc));
        n_trans += n_filler_trans;
    }

    E_INFO
        ("FSG: %d states, %d transitions (%d null, %d alt, %d filler,  %d unknown)\n",
         word_fsg->n_state, n_trans, n_null_trans, n_alt_trans,
         n_filler_trans, n_unk);

#if __FSG_DBG__
    E_INFO("FSG before NULL closure:\n");
    word_fsg_write(word_fsg, stdout);
#endif

    /* Null transitions closure */
    nulls = word_fsg_null_trans_closure(word_fsg, nulls);
    glist_free(nulls);

#if __FSG_DBG__
    E_INFO("FSG after NULL closure:\n");
    word_fsg_write(word_fsg, stdout);
#endif

    /* Compute left and right context CIphone lists for each state */
    word_fsg_lc_rc(word_fsg);

#if __FSG_DBG__
    E_INFO("FSG after lc/rc:\n");
    word_fsg_write(word_fsg, stdout);
#endif

    return word_fsg;
}
Exemplo n.º 16
0
/*
 * Compute the left and right context CIphone sets for each state.
 * (Needed for building the phone HMM net using cross-word triphones.  Invoke
 * after computing null transitions closure.)
 */
static void
word_fsg_lc_rc(word_fsg_t * fsg)
{
    int32 s, d, i, j;
    int32 n_ci;
    gnode_t *gn;
    word_fsglink_t *l;
    int32 silcipid;
    int32 endwid;
    int32 len;
    dict_t *dict;
    mdef_t *mdef;

    dict = fsg->dict;
    mdef = fsg->mdef;

    assert(fsg);
    assert(dict);
    assert(mdef);
    endwid = dict_basewid(dict, dict_finishwid(dict));

    silcipid = mdef_silphone(mdef);
    assert(silcipid >= 0);
    E_INFO("Value of silcipid %d\n", silcipid);
    n_ci = fsg->n_ciphone;
    E_INFO("No of CI phones %d\n", n_ci);
    if (n_ci > 127) {
        E_FATAL
            ("#phones(%d) > 127; cannot use int8** for word_fsg_t.{lc,rc}\n",
             n_ci);
    }

    /*
     * fsg->lc[s] = set of left context CIphones for state s.  Similarly, rc[s]
     * for right context CIphones.
     */
    fsg->lc =
        (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8));
    fsg->rc =
        (int8 **) ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(int8));

    for (s = 0; s < fsg->n_state; s++) {
        for (d = 0; d < fsg->n_state; d++) {
            for (gn = fsg->trans[s][d]; gn; gn = gnode_next(gn)) {
                l = (word_fsglink_t *) gnode_ptr(gn);
                assert(l->wid >= 0);

                /*
                 * Add the first CIphone of l->wid to the rclist of state s, and
                 * the last CIphone to lclist of state d.
                 * (Filler phones are a pain to deal with.  There is no direct
                 * marking of a filler phone; but only filler words are supposed to
                 * use such phones, so we use that fact.  HACK!!  FRAGILE!!)
                 */
                if (dict_filler_word(dict, l->wid) || (l->wid == endwid)) {
                    /* Filler phone; use silence phone as context */
                    fsg->rc[s][silcipid] = 1;
                    fsg->lc[d][silcipid] = 1;
                }
                else {
                    len = dict_pronlen(dict, l->wid);
                    fsg->rc[s][dict_pron(dict, l->wid, 0)] = 1;
                    fsg->lc[d][dict_pron(dict, l->wid, len - 1)] = 1;
                }
            }
        }

        /*
         * Add SIL phone to the lclist and rclist of each state.  Strictly
         * speaking, only needed at start and final states, respectively, but
         * all states considered since the user may change the start and final
         * states.  In any case, most applications would have a silence self
         * loop at each state, hence these would be needed anyway.
         */
        fsg->lc[s][silcipid] = 1;
        fsg->rc[s][silcipid] = 1;
    }

    /*
     * Propagate lc and rc lists past null transitions.  (Since FSG contains
     * null transitions closure, no need to worry about a chain of successive
     * null transitions.  Right??)
     */
    for (s = 0; s < fsg->n_state; s++) {
        for (d = 0; d < fsg->n_state; d++) {
            l = fsg->null_trans[s][d];
            if (l) {
                /*
                 * lclist(d) |= lclist(s), because all the words ending up at s, can
                 * now also end at d, becoming the left context for words leaving d.
                 */
                for (i = 0; i < n_ci; i++)
                    fsg->lc[d][i] |= fsg->lc[s][i];
                /*
                 * Similarly, rclist(s) |= rclist(d), because all the words leaving d
                 * can equivalently leave s, becoming the right context for words
                 * ending up at s.
                 */
                for (i = 0; i < n_ci; i++)
                    fsg->rc[s][i] |= fsg->rc[d][i];
            }
        }
    }

    /* Convert the bit-vector representation into a list */
    for (s = 0; s < fsg->n_state; s++) {
        j = 0;
        for (i = 0; i < n_ci; i++) {
            if (fsg->lc[s][i]) {
                fsg->lc[s][j] = i;
                j++;
            }
        }
        fsg->lc[s][j] = -1;     /* Terminate the list */

        j = 0;
        for (i = 0; i < n_ci; i++) {
            if (fsg->rc[s][i]) {
                fsg->rc[s][j] = i;
                j++;
            }
        }
        fsg->rc[s][j] = -1;     /* Terminate the list */
    }
}
Exemplo n.º 17
0
fillpen_t *fillpen_init (dict_t *dict, char *file, float64 silprob, float64 fillprob,
			 float64 lw, float64 wip)
{
    s3wid_t w, bw;
    float64 prob;
    FILE *fp;
    char line[1024], wd[1024];
    int32 k;
    fillpen_t *_fillpen;
    
    _fillpen = (fillpen_t *) ckd_calloc (1, sizeof(fillpen_t));
    
    _fillpen->dict = dict;
    _fillpen->lw = lw;
    _fillpen->wip = wip;
    if (dict->filler_end >= dict->filler_start)
	_fillpen->prob = (int32 *) ckd_calloc (dict->filler_end - dict->filler_start + 1,
					       sizeof(int32));
    else
	_fillpen->prob = NULL;
    
    /* Initialize all words with filler penalty (HACK!! backward compatibility) */
    prob = fillprob;
    for (w = dict->filler_start; w <= dict->filler_end; w++)
	_fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw);

    /* Overwrite silence penalty (HACK!! backward compatibility) */
    w = dict_wordid (dict, S3_SILENCE_WORD);
    if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end))
	E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD);
    prob = silprob;
    _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw);
    
    /* Overwrite with filler prob input file, if specified */
    if (! file)
	return _fillpen;
    
    E_INFO("Reading filler penalty file: %s\n", file);
    if ((fp = fopen (file, "r")) == NULL)
	E_FATAL("fopen(%s,r) failed\n", file);
    while (fgets (line, sizeof(line), fp) != NULL) {
	if (line[0] == '#')	/* Skip comment lines */
	    continue;
	
	k = sscanf (line, "%s %lf", wd, &prob);
	if ((k != 0) && (k != 2))
	    E_FATAL("Bad input line: %s\n", line);
	w = dict_wordid(dict, wd);
	if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end))
	    E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD);
	
	_fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw);
    }
    fclose (fp);
    
    /* Replicate fillpen values for alternative pronunciations */
    for (w = dict->filler_start; w <= dict->filler_end; w++) {
	bw = dict_basewid (dict, w);
	if (bw != w)
	    _fillpen->prob[w-dict->filler_start] = _fillpen->prob[bw-dict->filler_start];
    }
    
    return _fillpen;
}
static void
fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
{
    int32 cf, nf, b, thresh, pip, i, w, newscore;
    int32 best_silrc_score = 0, best_silrc_bp = 0;      /* FIXME: good defaults? */
    bptbl_t *bp;
    int32 *rcss;
    root_chan_t *rhmm;
    int32 *awl;
    float32 lwf;
    dict_t *dict = ps_search_dict(ngs);
    dict2pid_t *d2p = ps_search_dict2pid(ngs);

    cf = frame_idx;
    nf = cf + 1;
    thresh = ngs->best_score + ngs->fwdflatbeam;
    pip = ngs->pip;
    best_silrc_score = WORST_SCORE;
    lwf = ngs->fwdflat_fwdtree_lw_ratio;

    /* Search for all words starting within a window of this frame.
     * These are the successors for words exiting now. */
    get_expand_wordlist(ngs, cf, ngs->max_sf_win);

    /* Scan words exited in current frame */
    for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
        xwdssid_t *rssid;
        int32 silscore;

        bp = ngs->bp_table + b;
        ngs->word_lat_idx[bp->wid] = NO_BP;

        if (bp->wid == ps_search_finish_wid(ngs))
            continue;

        /* DICT2PID location */
        /* Get the mapping from right context phone ID to index in the
         * right context table and the bscore_stack. */
        rcss = ngs->bscore_stack + bp->s_idx;
        if (bp->last2_phone == -1)
            rssid = NULL;
        else
            rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone);

        /* Transition to all successor words. */
        for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
            int32 n_used;

            w = ngs->expand_word_list[i];

            /* Get the exit score we recorded in save_bwd_ptr(), or
             * something approximating it. */
            if (rssid)
                newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]];
            else
                newscore = bp->score;
            if (newscore == WORST_SCORE)
                continue;
            /* FIXME: Floating point... */
            newscore += lwf
                * (ngram_tg_score(ngs->lmset,
                                  dict_basewid(dict, w),
                                  bp->real_wid,
                                  bp->prev_real_wid,
                                  &n_used) >> SENSCR_SHIFT);
            newscore += pip;

            /* Enter the next word */
            if (newscore BETTER_THAN thresh) {
                rhmm = (root_chan_t *) ngs->word_chan[w];
                if ((hmm_frame(&rhmm->hmm) < cf)
                    || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
                    hmm_enter(&rhmm->hmm, newscore, b, nf);
                    /* DICT2PID: This is where mpx ssids get introduced. */
                    /* Look up the ssid to use when entering this mpx triphone. */
                    hmm_mpx_ssid(&rhmm->hmm, 0) =
                        dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
                                          dict_last_phone(dict, bp->wid));
                    assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0)));
                    E_DEBUG(6,("ssid %d(%d,%d) = %d\n",
                               rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone,
                               hmm_mpx_ssid(&rhmm->hmm, 0)));
                    bitvec_set(ngs->word_active, w);
                }
            }
        }

        /* Get the best exit into silence. */
        if (rssid)
            silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]];
        else
            silscore = bp->score;
        if (silscore BETTER_THAN best_silrc_score) {
            best_silrc_score = silscore;
            best_silrc_bp = b;
        }
    }

    /* Transition to <sil> */
    newscore = best_silrc_score + ngs->silpen + pip;
    if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
        w = ps_search_silence_wid(ngs);
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if ((hmm_frame(&rhmm->hmm) < cf)
            || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
            hmm_enter(&rhmm->hmm, newscore,
                      best_silrc_bp, nf);
            bitvec_set(ngs->word_active, w);
        }
    }
    /* Transition to noise words */
    newscore = best_silrc_score + ngs->fillpen + pip;
    if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
        for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
            rhmm = (root_chan_t *) ngs->word_chan[w];
            /* Noise words that aren't a single phone will have NULL here. */
            if (rhmm == NULL)
                continue;
            if ((hmm_frame(&rhmm->hmm) < cf)
                || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
                hmm_enter(&rhmm->hmm, newscore,
                          best_silrc_bp, nf);
                bitvec_set(ngs->word_active, w);
            }
        }
    }

    /* Reset initial channels of words that have become inactive even after word trans. */
    i = ngs->n_active_word[cf & 0x1];
    awl = ngs->active_word_list[cf & 0x1];
    for (w = *(awl++); i > 0; --i, w = *(awl++)) {
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if (hmm_frame(&rhmm->hmm) == cf) {
            hmm_clear_scores(&rhmm->hmm);
        }
    }
}
/*
 * Build a sentence HMM for the given transcription (wordstr).  A two-level DAG is
 * built: phone-level and state-level.
 *   - <s> and </s> always added at the beginning and end of sentence to form an
 *     augmented transcription.
 *   - Optional <sil> and noise words added between words in the augmented
 *     transcription.
 * wordstr must contain only the transcript; no extraneous stuff such as utterance-id.
 * Phone-level HMM structure has replicated nodes to allow for different left and right
 * context CI phones; hence, each pnode corresponds to a unique triphone in the sentence
 * HMM.
 * Return 0 if successful, <0 if any error (eg, OOV word encountered).
 */
int32 align_build_sent_hmm (char *wordstr)
{
    s3wid_t w, nextw;
    int32 k, oov;
    pnode_t *word_end, *node;
    char *wd, delim, *wdcopy;

/* HACK HACKA HACK BHIKSHA */
    int32 firsttime = 1;
/* END HACK HACKA HACK */

    
    /* Initialize dummy head and tail entries of sent hmm */
    phead.wid = BAD_WID;
    phead.ci = BAD_CIPID;
    phead.lc = BAD_CIPID;	/* No predecessor */
    phead.rc = BAD_CIPID;	/* Any phone can follow head */
    phead.pid = BAD_PID;
    phead.succlist = NULL;
    phead.predlist = NULL;
    phead.next = NULL;		/* Will ultimately be the head of list of all pnodes */
    phead.id = -1;		/* Hardwired */
    phead.startstate = NULL;
    
    ptail.wid = BAD_WID;
    ptail.ci = BAD_CIPID;
    ptail.lc = BAD_CIPID;	/* Any phone can precede tail */
    ptail.rc = BAD_CIPID;	/* No successor */
    ptail.pid = BAD_PID;
    ptail.succlist = NULL;
    ptail.predlist = NULL;
    ptail.next = NULL;
    ptail.id = -2;		/* Hardwired */
    ptail.startstate = NULL;

    n_pnode = 0;
    pnode_list = NULL;
    oov = 0;
    
    /* State-level DAG initialization should be here in case the build is aborted */
    shead.pnode = &phead;
    shead.succlist = NULL;
    shead.predlist = NULL;
    shead.sen = BAD_SENID;
    shead.state = mdef->n_emit_state;
    shead.hist = NULL;

    stail.pnode = &ptail;
    stail.succlist = NULL;
    stail.predlist = NULL;
    stail.sen = BAD_SENID;
    stail.state = 0;
    stail.hist = NULL;
    
    /* Obtain the first transcript word */
    k = nextword (wordstr, " \t\n", &wd, &delim);
    if (k < 0)
	nextw = finishwid;
    else {
	wordstr = wd + k;
	wdcopy = ckd_salloc (wd);
	*wordstr = delim;
	nextw = dict_wordid (wdcopy);
	if (IS_WID(nextw))
	    nextw = dict_basewid (nextw);
    }
    
    /* Create node(s) for <s> before any transcript word */
/* HACK HACKA HACK BHIKSHA 
    word_end = append_transcript_word (startwid, &phead, nextw, 0, 1);
 END HACK HACKA HACK BHIKSHA */

    /* Append each word in transcription to partial sent HMM created so far */
    while (k >= 0) {
	w = nextw;
	if (NOT_WID(w)) {
	    E_ERROR("%s not in dictionary\n", wdcopy);
	    oov = 1;
	    /* Hack!! Temporarily set w to some dummy just to run through sentence */
	    w = finishwid;
	}
	ckd_free (wdcopy);
	
	k = nextword (wordstr, " \t\n", &wd, &delim);

	if (k < 0)
	    nextw = finishwid;
	else {
	    wordstr = wd + k;
	    wdcopy = ckd_salloc (wd);
	    *wordstr = delim;
	    nextw = dict_wordid (wdcopy);
	    if (IS_WID(nextw))
		nextw = dict_basewid (nextw);
	}

/* HACK HACKA HACK BHIKSHA */
        if (firsttime){
	    word_end = append_transcript_word (w, &phead, nextw, 0, 1);
            firsttime = 0;
        }
        else
           if (nextw == finishwid)
               word_end = append_transcript_word (w, word_end, BAD_WID, 1, 0);
           else
	       word_end = append_transcript_word (w, word_end, nextw, 1, 1);
/* END HACK HACKA HACK BHIKSHA */
    }
    if (oov)
	return -1;
    
    /* Append phone HMMs for </s> at the end; link to tail node */
/* HACK HACKA HACK BHIKSHA 
    word_end = append_transcript_word (finishwid, word_end, BAD_WID, 1, 0);
 END HACK HACKA HACK BHIKSHA */

    for (node = word_end; node; node = node->next)
	link_pnodes (node, &ptail);
    
    /* Build state-level DAG from the phone-level one */
    build_state_dag ();
    /* Dag must begin and end at shead and stail, respectively */
    assert (shead.succlist);
    assert (stail.predlist);
    assert (! shead.predlist);
    assert (! stail.succlist);

#if _DEBUG_ALIGN_
    dump_sent_hmm ();	/* For debugging */
#endif

    k = n_pnode * mdef->n_emit_state;
    if (k > active_list_size) {	/* Need to grow active list arrays */
	if (active_list_size > 0) {
	    ckd_free (cur_active);
	    ckd_free (next_active);
	}
	for (; active_list_size <= k; active_list_size += ACTIVE_LIST_SIZE_INCR);
	cur_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *));
	next_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *));
    }
    
    return 0;
}
Exemplo n.º 20
0
void lmcontext_load (corpus_t *corp, char *uttid, s3wid_t *pred, s3wid_t *succ)
{
    char *str, wd[4096], *strp;
    s3wid_t w[3];
    int32 i, n;
    dict_t *dict;
    s3lmwid_t lwid;
    
    if ((str = corpus_lookup (corp, uttid)) == NULL)
	E_FATAL("Couldn't find LM context for %s\n", uttid);
    dict = dict_getdict ();
    
    strp = str;
    for (i = 0; i < 4; i++) {
	if (sscanf (strp, "%s%n", wd, &n) != 1) {
	    if (i < 3)
		E_FATAL("Bad LM context spec for %s: %s\n", uttid, str);
	    else
		break;
	}
	strp += n;
	
	if (strcmp (wd, "-") == 0)
	    w[i] = BAD_WID;
	else {
	    w[i] = dict_wordid (wd);
	    if (NOT_WID(w[i]))
		E_FATAL("LM context word (%s) for %s not in dictionary\n", wd, uttid);
	    w[i] = dict_basewid(w[i]);
	    
	    switch (i) {
	    case 0: 
		if ((n = dict->word[w[0]].n_comp) > 0)
		    w[0] = dict->word[w[0]].comp[n-1].wid;
		break;
		
	    case 1:
		if ((n = dict->word[w[1]].n_comp) > 0) {
		    w[0] = dict->word[w[1]].comp[n-2].wid;
		    w[1] = dict->word[w[1]].comp[n-1].wid;
		}
		break;
		
	    case 2:
		if (w[2] != dict_wordid(FINISH_WORD))
		    E_FATAL("Illegal successor LM context for %s: %s\n", uttid, str);
		break;
		
	    default:
		assert (0);	/* Should never get here */
		break;
	    }
	}
    }
    
    if (IS_WID(w[0]) && NOT_WID(w[1]))
	E_FATAL("Bad LM context spec for %s: %s\n", uttid, str);

    for (i = 0; i < 3; i++) {
	if (IS_WID(w[i])) {
	    lwid = lm_lmwid (w[i]);
	    if (NOT_LMWID(lwid))
		E_FATAL("LM context word (%s) for %s not in LM\n", wd, uttid);
	}
    }
    
    pred[0] = w[0];
    pred[1] = w[1];
    *succ = w[2];
}
Exemplo n.º 21
0
int
ld_utt_hyps(live_decoder_t *decoder, char **hyp_str, hyp_t ***hyp_segs)
{
  int32	id;
  int32	i = 0;
  glist_t hyp_list;
  gnode_t *node;
  hyp_t *hyp;
  dict_t *dict;
  char *hyp_strptr;
  kb_t *kb = &decoder->kb;

  if (decoder->ld_state == LD_STATE_ENDED) {
    if (hyp_segs) {
      *hyp_segs = kb->hyp_segs;
    }
    if (hyp_str) {
      *hyp_str = kb->hyp_str;
    }
    return 0;
  }
  else {
    kb_freehyps(kb);
  }

  dict = kbcore_dict (decoder->kbcore);
  id = vithist_partialutt_end(kb->vithist, decoder->kbcore);
  if (id >= 0) {
    hyp_list = vithist_backtrace(kb->vithist, id);

    /* record the segment length and the overall string length */
    for (node = hyp_list; node; node = gnode_next(node)) {
      hyp = (hyp_t *)gnode_ptr(node);
      if (hyp_segs) {
	kb->hyp_seglen++;
      }
      if (hyp_str) {
	if (!dict_filler_word(dict, hyp->id) && 
	    hyp->id != dict_finishwid(dict)) {
	  kb->hyp_strlen +=
	    strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1;
	}
      }
    }

    /* allocate array to hold the segments and/or decoded string */
    if (hyp_segs) {
      kb->hyp_segs = (hyp_t **)ckd_calloc(kb->hyp_seglen, sizeof(hyp_t *));
    }
    if (hyp_str) {
      kb->hyp_str = (char *)ckd_calloc(kb->hyp_strlen+1, sizeof(char));
    }
		
    /* iterate thru to fill in the array of segments and/or decoded string */
    i = 0;
    if (hyp_str) {
      hyp_strptr = kb->hyp_str;
    }
    for (node = hyp_list; node; node = gnode_next(node), i++) {
      hyp = (hyp_t *)gnode_ptr(node);
      if (hyp_segs) {
	kb->hyp_segs[i] = hyp;
      }
      if (hyp_str) {
	strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id)));
	hyp_strptr += strlen(hyp_strptr);
	strcat(hyp_strptr, " ");
	hyp_strptr += 1;
      }
    }
    glist_free(hyp_list);

    if (hyp_str) {
      kb->hyp_str[kb->hyp_strlen - 1] = '\0';
    }
  }

  if (hyp_segs) {
    *hyp_segs = kb->hyp_segs;
  }

  if (hyp_str) {
    *hyp_str = kb->hyp_str;
  }

  return 0;
}
Exemplo n.º 22
0
int
ld_record_hyps(live_decoder_t * _decoder, int _end_utt)
{
    int32 id;
    int32 i = 0;
    glist_t hyp_list;
    gnode_t *node;
    srch_hyp_t *hyp;
    char *hyp_strptr = 0;
    char *hyp_str = 0;
    srch_hyp_t **hyp_segs = 0;
    int hyp_seglen = 0;
    int hyp_strlen = 0;
    int finish_wid = 0;
    kb_t *kb = 0;
    dict_t *dict;
    int rv;

    assert(_decoder != NULL);

    ld_free_hyps(_decoder);

    kb = &_decoder->kb;
    dict = kbcore_dict(_decoder->kbcore);
    id = _end_utt ?
        vithist_utt_end(kb->vithist, _decoder->kbcore) :
        vithist_partialutt_end(kb->vithist, _decoder->kbcore);
    if (id < 0) {
        E_WARN("Failed to retrieve viterbi history.\n");
        return LD_ERROR_INTERNAL;
    }

  /** record the segment length and the overall string length */
    hyp_list = vithist_backtrace(kb->vithist, id, dict);
    finish_wid = dict_finishwid(dict);
    for (node = hyp_list; node != NULL; node = gnode_next(node)) {
        hyp = (srch_hyp_t *) gnode_ptr(node);
        hyp_seglen++;
        if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) {
            hyp_strlen +=
                strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) +
                1;
        }
    }

    if (hyp_strlen == 0) {
        hyp_strlen = 1;
    }

  /** allocate array to hold the segments and/or decoded string */
    hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char));
    hyp_segs =
        (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *));
    if (hyp_segs == NULL || hyp_str == NULL) {
        E_WARN("Failed to allocate storage for hypothesis.\n");
        rv = LD_ERROR_OUT_OF_MEMORY;
        goto ld_record_hyps_cleanup;
    }

  /** iterate thru to fill in the array of segments and/or decoded string */
    i = 0;
    hyp_strptr = hyp_str;
    for (node = hyp_list; node != NULL; node = gnode_next(node), i++) {
        hyp = (srch_hyp_t *) gnode_ptr(node);
        hyp_segs[i] = hyp;

        hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id));
        if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) {
            strcat(hyp_strptr,
                   dict_wordstr(dict, dict_basewid(dict, hyp->id)));
            hyp_strptr += strlen(hyp_strptr);
            *hyp_strptr = ' ';
            hyp_strptr += 1;
        }
    }
    glist_free(hyp_list);

    hyp_str[hyp_strlen - 1] = '\0';
    hyp_segs[hyp_seglen] = 0;
    _decoder->hyp_frame_num = _decoder->num_frames_decoded;
    _decoder->hyp_segs = hyp_segs;
    _decoder->hyp_str = hyp_str;

    return LD_SUCCESS;

  ld_record_hyps_cleanup:
    if (hyp_segs != NULL) {
        ckd_free(hyp_segs);
    }
    if (hyp_str != NULL) {
        ckd_free(hyp_segs);
    }
    if (hyp_list != NULL) {
        for (node = hyp_list; node != NULL; node = gnode_next(node)) {
            if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) {
                ckd_free(hyp);
            }
        }
    }

    return rv;
}
Exemplo n.º 23
0
int
s3_decode_record_hyps(s3_decode_t * _decode, int _end_utt)
{
    int32 i = 0;
    glist_t hyp_list;
    gnode_t *node;
    srch_hyp_t *hyp;
    char *hyp_strptr = 0;
    char *hyp_str = 0;
    srch_t *srch;
    srch_hyp_t **hyp_segs = 0;
    int hyp_seglen = 0;
    int hyp_strlen = 0;
    int finish_wid = 0;
    kb_t *kb = 0;
    dict_t *dict;
    int rv;

    if (_decode == NULL)
        return S3_DECODE_ERROR_NULL_POINTER;

    s3_decode_free_hyps(_decode);

    kb = &_decode->kb;
    dict = kbcore_dict(_decode->kbcore);
    srch = (srch_t *) _decode->kb.srch;
    hyp_list = srch_get_hyp(srch);
    if (hyp_list == NULL) {
        E_WARN("Failed to retrieve viterbi history.\n");
        return S3_DECODE_ERROR_INTERNAL;
    }

    /** record the segment length and the overall string length */
    finish_wid = dict_finishwid(dict);
    for (node = hyp_list; node != NULL; node = gnode_next(node)) {
        hyp = (srch_hyp_t *) gnode_ptr(node);
        hyp_seglen++;
        if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) {
            hyp_strlen +=
                strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) +
                1;
        }
    }

    if (hyp_strlen == 0) {
        hyp_strlen = 1;
    }

  /** allocate array to hold the segments and/or decoded string */
    hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char));
    hyp_segs =
        (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *));
    if (hyp_segs == NULL || hyp_str == NULL) {
        E_WARN("Failed to allocate storage for hypothesis.\n");
        rv = S3_DECODE_ERROR_OUT_OF_MEMORY;
        goto s3_decode_record_hyps_cleanup;
    }

  /** iterate thru to fill in the array of segments and/or decoded string */
    i = 0;
    hyp_strptr = hyp_str;
    for (node = hyp_list; node != NULL; node = gnode_next(node), i++) {
        hyp = (srch_hyp_t *) gnode_ptr(node);
        hyp_segs[i] = hyp;

        hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id));
        if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) {
            strcat(hyp_strptr,
                   dict_wordstr(dict, dict_basewid(dict, hyp->id)));
            hyp_strptr += strlen(hyp_strptr);
            *hyp_strptr = ' ';
            hyp_strptr += 1;
        }
    }
    glist_free(hyp_list);

    hyp_str[hyp_strlen - 1] = '\0';
    hyp_segs[hyp_seglen] = 0;
    _decode->hyp_frame_num = _decode->num_frames_decoded;
    _decode->hyp_segs = hyp_segs;
    _decode->hyp_str = hyp_str;

    return S3_DECODE_SUCCESS;

  s3_decode_record_hyps_cleanup:
    if (hyp_segs != NULL) {
        ckd_free(hyp_segs);
    }
    if (hyp_str != NULL) {
        ckd_free(hyp_str);
    }
    if (hyp_list != NULL) {
        for (node = hyp_list; node != NULL; node = gnode_next(node)) {
            if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) {
                ckd_free(hyp);
            }
        }
        glist_free(hyp_list);
    }

    return rv;
}
/**
 * Find all active words in backpointer table and sort by frame.
 */
static void
build_fwdflat_wordlist(ngram_search_t *ngs)
{
    int32 i, f, sf, ef, wid, nwd;
    bptbl_t *bp;
    ps_latnode_t *node, *prevnode, *nextnode;

    /* No tree-search, use statically allocated wordlist. */
    if (!ngs->fwdtree)
        return;

    memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist));

    /* Scan the backpointer table for all active words and record
     * their exit frames. */
    for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) {
        sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1;
        ef = bp->frame;
        wid = bp->wid;

        /* Anything that can be transitioned to in the LM can go in
         * the word list. */
        if (!ngram_model_set_known_wid(ngs->lmset,
                                       dict_basewid(ps_search_dict(ngs), wid)))
            continue;

        /* Look for it in the wordlist. */
        for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid);
             node = node->next);

        /* Update last end frame. */
        if (node)
            node->lef = ef;
        else {
            /* New node; link to head of list */
            node = listelem_malloc(ngs->latnode_alloc);
            node->wid = wid;
            node->fef = node->lef = ef;

            node->next = ngs->frm_wordlist[sf];
            ngs->frm_wordlist[sf] = node;
        }
    }

    /* Eliminate "unlikely" words, for which there are too few end points */
    for (f = 0; f < ngs->n_frame; f++) {
        prevnode = NULL;
        for (node = ngs->frm_wordlist[f]; node; node = nextnode) {
            nextnode = node->next;
            /* Word has too few endpoints */
            if ((node->lef - node->fef < ngs->min_ef_width) ||
                /* Word is </s> and doesn't actually end in last frame */
                ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) {
                if (!prevnode)
                    ngs->frm_wordlist[f] = nextnode;
                else
                    prevnode->next = nextnode;
                listelem_free(ngs->latnode_alloc, node);
            }
            else
                prevnode = node;
        }
    }

    /* Form overall wordlist for 2nd pass */
    nwd = 0;
    bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
    for (f = 0; f < ngs->n_frame; f++) {
        for (node = ngs->frm_wordlist[f]; node; node = node->next) {
            if (!bitvec_is_set(ngs->word_active, node->wid)) {
                bitvec_set(ngs->word_active, node->wid);
                ngs->fwdflat_wordlist[nwd++] = node->wid;
            }
        }
    }
    ngs->fwdflat_wordlist[nwd] = -1;
    E_INFO("Utterance vocabulary contains %d words\n", nwd);
}