/*
 * Append a new word to partially build phone-level sentence HMM.  (Handle alternative
 * pronunciations.)  Link new word to end phones of previous words.
 * Append optional filler words before w, if indicated.
 * Also Link prev_end into the global node list.
 * Return value: list of end phone nodes for w.  (NOTE: these are not yet linked into
 * the global node list.)
 */
static pnode_t *append_transcript_word
    (s3wid_t w,			/* Transcript word to be appended */
     pnode_t *prev_end,		/* Previous end points to be attached to w */
     s3wid_t nextw,		/* Next word to follow w (ignoring optional fillers) */
     int32 prefix_filler,	/* Whether optional filler words to precede w */
     int32 append_filler)	/* Whether optional filler words to follow w */
{
    int32 i, p;
    pnode_t *new_end, *tmp_end, *node;
    s3cipid_t pred_ci[256], succ_ci[256];
    s3wid_t fw;
    
    if (mdef->n_ciphone >= 256)
	E_FATAL("Increase pred_ci, succ_ci array sizes to > #CIphones (%d)\n",
		mdef->n_ciphone);
    assert (prev_end != NULL);
    
    /* Add optional silence/filler words before w, if indicated */
    if (prefix_filler) {
	build_pred_ci (prev_end, pred_ci);	/* Predecessor CI list for fillers */
	build_succ_ci (w, 0, succ_ci);		/* Successor CI list for fillers */
	
	new_end = NULL;
	for (i = 0; IS_WID(fillwid[i]); i++) {
	    for (fw = fillwid[i]; IS_WID(fw); fw = dict->word[fw].alt) {
		tmp_end = append_word (fw, prev_end, pred_ci, succ_ci);

		for (node = tmp_end; node->next; node = node->next);
		node->next = new_end;
		new_end = tmp_end;
	    }
	}
	
	/* Augment prev_end with new_end for filler words added above */
	for (node = prev_end; node->next; node = node->next);
	node->next = new_end;
    }

    /* Add w */
    build_pred_ci (prev_end, pred_ci);			/* Predecessor CI list for w */
    build_succ_ci (nextw, append_filler, succ_ci);	/* Successor CI list for w */

    new_end = NULL;
    for (; IS_WID(w); w = dict->word[w].alt) {
	tmp_end = append_word (w, prev_end, pred_ci, succ_ci);

	for (node = tmp_end; node->next; node = node->next);
	node->next = new_end;
	new_end = tmp_end;
    }
    
    return (new_end);
}
Exemple #2
0
static int32 refline2wds (char *line, dagnode_t *ref, int32 *noov, char *uttid)
{
    int32 i, n, k;
    s3wid_t w, wid[MAX_UTT_LEN];
    
    n = 0;
    uttid[0] = '\0';
    *noov = 0;
    
    if ((n = line2wid (dict, line, wid, MAX_UTT_LEN-1, 1, uttid)) < 0)
	E_FATAL("Error in parsing ref line: %s\n", line);
    wid[n++] = silwid;
    
    for (i = 0; i < n; i++) {
	if (dict_filler_word (dict, wid[i]) && (i < n-1))
	    E_FATAL("Filler word (%s) in ref: %s\n", dict_wordstr(dict, wid[i]), line);
	
	if (wid[i] >= oovbegin) {
	    /* Perhaps one of a homophone pair */
	    w = hom_lookup (wid[i]);
	    if (IS_WID(w))
		wid[i] = w;
	    if (wid[i] >= oovbegin)
		(*noov)++;
	}

	wid2dagnode (ref+i, i, wid[i]);
    }

    return n;
}
Exemple #3
0
main (int32 argc, char *argv[])
{
    mdef_t *m;
    dict_t *d;
    char wd[1024];
    s3wid_t wid;
    int32 p;
    
    if (argc < 3)
	E_FATAL("Usage: %s {mdeffile | NULL} dict [fillerdict]\n", argv[0]);
    
    m = (strcmp (argv[1], "NULL") != 0) ? mdef_init (argv[1]) : NULL;
    d = dict_init (m, argv[2], ((argc > 3) ? argv[3] : NULL), '_');
    
    for (;;) {
	printf ("word> ");
	scanf ("%s", wd);
	
	wid = dict_wordid (d, wd);
	if (NOT_WID(wid))
	    E_ERROR("Unknown word\n");
	else {
	    for (wid = dict_basewid(d, wid); IS_WID(wid); wid = d->word[wid].alt) {
		printf ("%s\t", dict_wordstr(d, wid));
		for (p = 0; p < d->word[wid].pronlen; p++)
		    printf (" %s", dict_ciphone_str (d, wid, p));
		printf ("\n");
	    }
	}
    }
}
Exemple #4
0
s3wid_t _dict_nextalt (dict_t *d, s3wid_t wid)
{
    assert (d);
    assert (IS_WID(wid) && (wid < d->n_word));
    
    return (d->word[wid].alt);
}
Exemple #5
0
char *_dict_wordstr (dict_t *d, s3wid_t wid)
{
    assert (d);
    assert (IS_WID(wid) && (wid < d->n_word));
    
    return (d->word[wid].word);
}
Exemple #6
0
static void pronerr_output (char *id, s3wid_t *ref, int32 nref,
			    wseg_t *wseg, s3cipid_t *ap, int8 *ap_err,
			    int32 ws, int32 we, int32 ps, int32 pe)
{
    int32 j;
    s3wid_t rcwid, lcwid;
    char str[4096];
    
    /* Word sequence for region in error */
    sprintf (str, "%s", dict_wordstr (dict, dict_basewid(dict, ref[ws])));
    for (j = ws+1; j <= we; j++) {
	strcat (str, " ");
	strcat (str, dict_wordstr (dict, dict_basewid(dict, ref[j])));
    }
    printf ("%-22s\t=>\t", str);

    /* Print left context phone */
    /*lcwid = ((wseg[ws].s < 0) && (ws > 0) && IS_WID(ref[ws-1])) ? ref[ws-1] : BAD_WID;*/
    lcwid = (ws > 0) ? ref[ws-1] : BAD_WID;
    if (IS_WID(lcwid)) {
	j = dict->word[lcwid].pronlen - 1;
	sprintf (str, "(%s)", mdef_ciphone_str (mdef, dict->word[lcwid].ciphone[j]));
    } else
	strcpy (str, "()");
    printf ("%-5s", str);
    
    /* Phone sequence for region in error */
    for (j = ps; j <= pe; j++) {
	strcpy (str, mdef_ciphone_str (mdef, ap[j]));
	if (ap_err[j])
	    ucase (str);
	else
	    lcase (str);
	
	printf (" %s", str);
    }
    
    /* Right context if ending in error */
    /* rcwid = ((wseg[we].e < 0) && IS_WID(ref[we+1])) ? ref[we+1] : BAD_WID; */
    rcwid = ref[we+1];
    if (IS_WID(rcwid))
	printf ("\t(%s)", mdef_ciphone_str (mdef, dict->word[rcwid].ciphone[0]));
    else
	printf ("\t()");

    printf (" ( %s )\n", id);
}
int32 align_init ( void )
{
    int32 k;
    s3wid_t w;
    float64 *f64arg;
    
    mdef = mdef_getmdef ();
    tmat = tmat_gettmat ();
    dict = dict_getdict ();
    
    assert (mdef && tmat && dict);
    
    startwid = dict_wordid (START_WORD);
    finishwid = dict_wordid (FINISH_WORD);
    silwid = dict_wordid (SILENCE_WORD);
    
    if ((NOT_WID(startwid)) || (NOT_WID(finishwid)))
	E_FATAL("%s or %s not in dictionary\n", START_WORD, FINISH_WORD);
    if (NOT_WID(silwid))
	E_ERROR("%s not in dictionary; no optional silence inserted between words\n",
	       SILENCE_WORD);

    /* Create list of optional filler words to be inserted between transcript words */
    fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3),
				      sizeof(s3wid_t));
    k = 0;
    if (IS_WID(silwid))
	fillwid[k++] = silwid;
    for (w = dict->filler_start; w <= dict->filler_end; w++) {
	if ((dict_basewid (w) == w) &&
	    (w != silwid) && (w != startwid) && (w != finishwid))
	    fillwid[k++] = w;
    }
    fillwid[k] = BAD_WID;

    f64arg = (float64 *) cmd_ln_access ("-beam");
    beam = logs3 (*f64arg);
    E_INFO ("logs3(beam)= %d\n", beam);

    score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32));
    
    hist_head = NULL;
    
    align_stseg = NULL;
    align_phseg = NULL;
    align_wdseg = NULL;

    ctr_nstate = counter_new ("NS");
    
    return 0;
}
Exemple #8
0
main (int32 argc, char *argv[])
{
    dict_t **d;
    int32 i, k, p, wid;
    char line[16384], *wp[1024];
    
    if (argc < 2) {
	E_INFO("Usage: %s dictfile [dictfile ...] < vocabfile\n", argv[0]);
	exit(0);
    }
    d = (dict_t **) ckd_calloc (argc-1, sizeof(dict_t *));
    
    for (i = 1; i < argc; i++)
	d[i-1] = dict_init (NULL, argv[i], NULL, 0);
    
    while (fgets (line, sizeof(line), stdin) != NULL) {
	if ((k = str2words (line, wp, 1024)) < 0)
	    E_FATAL("Line too long: %s\n", line);
	if (k > 2)
	    E_FATAL("Vocab entry contains too many words\n");
	
	if (k == 0)
	    continue;
	if (k == 1)
	    wp[1] = wp[0];
	
	/* Look up word in each dictionary until found */
	k = 0;
	for (i = 0; (i < argc-1) && (k == 0); i++) {
	    wid = dict_wordid (d[i], wp[1]);
	    if (NOT_WID(wid))
		continue;
	    
	    for (wid = dict_basewid(d[i], wid);
		 IS_WID(wid);
		 wid = dict_nextalt(d[i], wid)) {
		k++;
		if (k == 1)
		    printf ("%s\t", wp[0]);
		else
		    printf ("%s(%d)\t", wp[0], k);
		
		for (p = 0; p < dict_pronlen(d[i], wid); p++)
		    printf (" %s", dict_ciphone_str (d[i], wid, p));
		printf ("\n");
	    }
	}
	if (k == 0)
	    E_ERROR("No pronunciation for: '%s'\n", wp[0]);
    }
}
static void build_succ_ci (s3wid_t w, int32 append_filler, s3cipid_t *succ_ci)
{
    int32 i, p;
    pnode_t *node;
    
    for (p = 0; p < mdef->n_ciphone; p++)
	succ_ci[p] = 0;

    for (; IS_WID(w); w = dict->word[w].alt)
	succ_ci[dict->word[w].ciphone[0]] = 1;
    
    if (append_filler) {
	for (i = 0; IS_WID(fillwid[i]); i++)
	    for (w = fillwid[i]; IS_WID(w); w = dict->word[w].alt)
		succ_ci[dict->word[w].ciphone[0]] = 1;
    }

    i = 0;
    for (p = 0; p < mdef->n_ciphone; p++) {
	if (succ_ci[p])
	    succ_ci[i++] = p;
    }
    succ_ci[i] = BAD_CIPID;
}
static void dump_pnode_succ (pnode_t *p)
{
    plink_t *l;
    
    printf ("  %5d", p->id);
    if (IS_WID(p->wid))
	printf (" %20s %02d %6d %4s",
		dict_wordstr(p->wid), p->pos, p->pid, mdef_ciphone_str (mdef, p->ci));
    else
	printf (" %20s %02d %6d %4s",
		"<phead>", 0, BAD_PID, "");
    printf (" %4s %4s",
	    IS_CIPID(p->lc) ? mdef_ciphone_str (mdef, p->lc) : "-",
	    IS_CIPID(p->rc) ? mdef_ciphone_str (mdef, p->rc) : "-");
    printf ("\t");

    for (l = p->succlist; l; l = l->next)
	printf (" %5d", l->node->id);
    
    printf ("\n");
}
Exemple #11
0
s3wid_t dict_wids2compwid (dict_t *d, s3wid_t *wid, int32 len)
{
    s3wid_t w;
    int32 i;
    
    if (! d->comp_head)
	return BAD_WID;
    
    assert (len > 1);
    
    for (w = d->comp_head[wid[0]]; IS_WID(w); w = d->comp_head[w]) {
	/* w is a compound word beginning with wid[0]; check if rest matches */
	assert (d->word[w].n_comp > 1);
	assert (d->word[w].comp[0] == wid[0]);
	
	if (d->word[w].n_comp == len) {
	    for (i = 0; (i < len) && (d->word[w].comp[i] == wid[i]); i++);
	    if (i == len)
		return (dict_basewid(d, w));
	}
    }

    return BAD_WID;
}
Exemple #12
0
void lmcontext_load (corpus_t *corp, char *uttid, s3wid_t *pred, s3wid_t *succ)
{
    char *str, wd[4096], *strp;
    s3wid_t w[3];
    int32 i, n;
    dict_t *dict;
    s3lmwid_t lwid;
    
    if ((str = corpus_lookup (corp, uttid)) == NULL)
	E_FATAL("Couldn't find LM context for %s\n", uttid);
    dict = dict_getdict ();
    
    strp = str;
    for (i = 0; i < 4; i++) {
	if (sscanf (strp, "%s%n", wd, &n) != 1) {
	    if (i < 3)
		E_FATAL("Bad LM context spec for %s: %s\n", uttid, str);
	    else
		break;
	}
	strp += n;
	
	if (strcmp (wd, "-") == 0)
	    w[i] = BAD_WID;
	else {
	    w[i] = dict_wordid (wd);
	    if (NOT_WID(w[i]))
		E_FATAL("LM context word (%s) for %s not in dictionary\n", wd, uttid);
	    w[i] = dict_basewid(w[i]);
	    
	    switch (i) {
	    case 0: 
		if ((n = dict->word[w[0]].n_comp) > 0)
		    w[0] = dict->word[w[0]].comp[n-1].wid;
		break;
		
	    case 1:
		if ((n = dict->word[w[1]].n_comp) > 0) {
		    w[0] = dict->word[w[1]].comp[n-2].wid;
		    w[1] = dict->word[w[1]].comp[n-1].wid;
		}
		break;
		
	    case 2:
		if (w[2] != dict_wordid(FINISH_WORD))
		    E_FATAL("Illegal successor LM context for %s: %s\n", uttid, str);
		break;
		
	    default:
		assert (0);	/* Should never get here */
		break;
	    }
	}
    }
    
    if (IS_WID(w[0]) && NOT_WID(w[1]))
	E_FATAL("Bad LM context spec for %s: %s\n", uttid, str);

    for (i = 0; i < 3; i++) {
	if (IS_WID(w[i])) {
	    lwid = lm_lmwid (w[i]);
	    if (NOT_LMWID(lwid))
		E_FATAL("LM context word (%s) for %s not in LM\n", wd, uttid);
	}
    }
    
    pred[0] = w[0];
    pred[1] = w[1];
    *succ = w[2];
}
Exemple #13
0
static wseg_t *line2wseg (char *line, s3wid_t *ref,
			  s3cipid_t *ap, int8 *ap_err, int32 aplen, char *id)
{
    char word[1024], uttid[1024], *lp;
    int32 i, k, n_hypci, n_refwd, n_refci, pronlen;
    s3cipid_t ci;
    typedef enum {CORR=0, REFERR=1, HYPERR=2} state_t;
    state_t state;
    static wseg_t *wseg = NULL;
    
    if (! wseg)
	wseg = (wseg_t *) ckd_calloc (MAX_UTT_LEN, sizeof(wseg_t));

    lp = line;
    n_hypci = n_refci = pronlen = 0;
    n_refwd = -1;
    uttid[0] = '\0';
    state = CORR;
    
    while (sscanf (lp, "%s%n", word, &k) == 1) {
	lp += k;
	
	if (is_uttid (word, uttid))
	    break;
	
	if (strcmp (word, "[[") == 0) {
	    if (state != CORR)
		E_FATAL("%s: Illegal [[\n", id);
	    state = REFERR;
	    if (n_refci < pronlen)
		wseg[n_refwd].err = 1;
	} else if (strcmp (word, "]]") == 0) {
	    if (state != HYPERR)
		E_FATAL("%s: Illegal ]]\n", id);
	    state = CORR;
	} else if (strcmp (word, "=>") == 0) {
	    if (state != REFERR)
		E_FATAL("%s: Illegal =>\n", id);
	    state = HYPERR;
	} else {
	    ci = mdef_ciphone_id (mdef, word);
	    if (NOT_CIPID(ci))
		E_FATAL("%s: Unknown CIphone %s\n", id, word);
	    
	    if (state != HYPERR) {	/* Check if matches next pron in ref word */
		if (n_refci >= pronlen) {
		    assert (n_refci == pronlen);
		    n_refwd++;
		    pronlen = dict->word[ref[n_refwd]].pronlen;
		    assert (pronlen > 0);

		    wseg[n_refwd].s = (state == CORR) ? n_hypci : -1;
		    wseg[n_refwd].e = -1;
		    wseg[n_refwd].err = 0;
		    
		    n_refci = 0;
		}
		if (NOT_WID(ref[n_refwd]))
		    E_FATAL("%s: Premature end of ref wid\n", id);

		if (dict->word[ref[n_refwd]].ciphone[n_refci] != ci)
		    E_FATAL("%s: CIphone mismatch at word %d, ciphone %d\n",
			    id, n_refwd, n_refci);
		n_refci++;
		if ((n_refci == pronlen) && (state == CORR))
		    wseg[n_refwd].e = n_hypci;

		if (state != CORR)
		    wseg[n_refwd].err = 1;
	    }
	    
	    if (state != REFERR) {
		if (n_hypci >= aplen)
		    E_FATAL("%s: Too many CIphones: >%d\n", id, aplen);
		ap[n_hypci] = ci;
		ap_err[n_hypci] = (state == CORR) ? 0 : 1;
		n_hypci++;
	    }
	}
    }
    assert (n_refci == pronlen);
    n_refwd++;
    assert (NOT_WID(ref[n_refwd]));
    wseg[n_refwd].s = wseg[n_refwd].e = n_hypci;
    wseg[n_refwd].err = 0;
    
    ap[n_hypci] = BAD_CIPID;
    ap_err[n_hypci] = 1;
    
    if (strcmp (uttid, id) != 0)
	E_FATAL("Uttid mismatch: %s expected, %s found\n", id, uttid);

#if 0
    for (i = 0; IS_WID(ref[i]); i++) {
	printf ("%s: %4d %4d %d %s\n", id, wseg[i].s, wseg[i].e, wseg[i].err,
		dict_wordstr (dict, ref[i]));
    }
#endif

    return wseg;
}
/*
 * Build a sentence HMM for the given transcription (wordstr).  A two-level DAG is
 * built: phone-level and state-level.
 *   - <s> and </s> always added at the beginning and end of sentence to form an
 *     augmented transcription.
 *   - Optional <sil> and noise words added between words in the augmented
 *     transcription.
 * wordstr must contain only the transcript; no extraneous stuff such as utterance-id.
 * Phone-level HMM structure has replicated nodes to allow for different left and right
 * context CI phones; hence, each pnode corresponds to a unique triphone in the sentence
 * HMM.
 * Return 0 if successful, <0 if any error (eg, OOV word encountered).
 */
int32 align_build_sent_hmm (char *wordstr)
{
    s3wid_t w, nextw;
    int32 k, oov;
    pnode_t *word_end, *node;
    char *wd, delim, *wdcopy;

/* HACK HACKA HACK BHIKSHA */
    int32 firsttime = 1;
/* END HACK HACKA HACK */

    
    /* Initialize dummy head and tail entries of sent hmm */
    phead.wid = BAD_WID;
    phead.ci = BAD_CIPID;
    phead.lc = BAD_CIPID;	/* No predecessor */
    phead.rc = BAD_CIPID;	/* Any phone can follow head */
    phead.pid = BAD_PID;
    phead.succlist = NULL;
    phead.predlist = NULL;
    phead.next = NULL;		/* Will ultimately be the head of list of all pnodes */
    phead.id = -1;		/* Hardwired */
    phead.startstate = NULL;
    
    ptail.wid = BAD_WID;
    ptail.ci = BAD_CIPID;
    ptail.lc = BAD_CIPID;	/* Any phone can precede tail */
    ptail.rc = BAD_CIPID;	/* No successor */
    ptail.pid = BAD_PID;
    ptail.succlist = NULL;
    ptail.predlist = NULL;
    ptail.next = NULL;
    ptail.id = -2;		/* Hardwired */
    ptail.startstate = NULL;

    n_pnode = 0;
    pnode_list = NULL;
    oov = 0;
    
    /* State-level DAG initialization should be here in case the build is aborted */
    shead.pnode = &phead;
    shead.succlist = NULL;
    shead.predlist = NULL;
    shead.sen = BAD_SENID;
    shead.state = mdef->n_emit_state;
    shead.hist = NULL;

    stail.pnode = &ptail;
    stail.succlist = NULL;
    stail.predlist = NULL;
    stail.sen = BAD_SENID;
    stail.state = 0;
    stail.hist = NULL;
    
    /* Obtain the first transcript word */
    k = nextword (wordstr, " \t\n", &wd, &delim);
    if (k < 0)
	nextw = finishwid;
    else {
	wordstr = wd + k;
	wdcopy = ckd_salloc (wd);
	*wordstr = delim;
	nextw = dict_wordid (wdcopy);
	if (IS_WID(nextw))
	    nextw = dict_basewid (nextw);
    }
    
    /* Create node(s) for <s> before any transcript word */
/* HACK HACKA HACK BHIKSHA 
    word_end = append_transcript_word (startwid, &phead, nextw, 0, 1);
 END HACK HACKA HACK BHIKSHA */

    /* Append each word in transcription to partial sent HMM created so far */
    while (k >= 0) {
	w = nextw;
	if (NOT_WID(w)) {
	    E_ERROR("%s not in dictionary\n", wdcopy);
	    oov = 1;
	    /* Hack!! Temporarily set w to some dummy just to run through sentence */
	    w = finishwid;
	}
	ckd_free (wdcopy);
	
	k = nextword (wordstr, " \t\n", &wd, &delim);

	if (k < 0)
	    nextw = finishwid;
	else {
	    wordstr = wd + k;
	    wdcopy = ckd_salloc (wd);
	    *wordstr = delim;
	    nextw = dict_wordid (wdcopy);
	    if (IS_WID(nextw))
		nextw = dict_basewid (nextw);
	}

/* HACK HACKA HACK BHIKSHA */
        if (firsttime){
	    word_end = append_transcript_word (w, &phead, nextw, 0, 1);
            firsttime = 0;
        }
        else
           if (nextw == finishwid)
               word_end = append_transcript_word (w, word_end, BAD_WID, 1, 0);
           else
	       word_end = append_transcript_word (w, word_end, nextw, 1, 1);
/* END HACK HACKA HACK BHIKSHA */
    }
    if (oov)
	return -1;
    
    /* Append phone HMMs for </s> at the end; link to tail node */
/* HACK HACKA HACK BHIKSHA 
    word_end = append_transcript_word (finishwid, word_end, BAD_WID, 1, 0);
 END HACK HACKA HACK BHIKSHA */

    for (node = word_end; node; node = node->next)
	link_pnodes (node, &ptail);
    
    /* Build state-level DAG from the phone-level one */
    build_state_dag ();
    /* Dag must begin and end at shead and stail, respectively */
    assert (shead.succlist);
    assert (stail.predlist);
    assert (! shead.predlist);
    assert (! stail.succlist);

#if _DEBUG_ALIGN_
    dump_sent_hmm ();	/* For debugging */
#endif

    k = n_pnode * mdef->n_emit_state;
    if (k > active_list_size) {	/* Need to grow active list arrays */
	if (active_list_size > 0) {
	    ckd_free (cur_active);
	    ckd_free (next_active);
	}
	for (; active_list_size <= k; active_list_size += ACTIVE_LIST_SIZE_INCR);
	cur_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *));
	next_active = (snode_t **) ckd_calloc (active_list_size, sizeof(snode_t *));
    }
    
    return 0;
}