Пример #1
0
/*
 * Create a directed link between "from" and "to" nodes, but if a link already exists,
 * choose one with the best link_scr.
 */
static void link_latnodes (latnode_t *from, latnode_t *to, int32 score, int32 ef)
{
    latlink_t *link;
    
    assert (to->reachable);
    
    /* Look for an existing link between "from" and "to" nodes */
    for (link = from->links; link && (link->to != to); link = link->next);
    
    if (! link) {
	/* No link between the two nodes; create a new one */
	link = (latlink_t *) listelem_alloc (sizeof(latlink_t));

	link->from = from;
	link->to = to;
	link->link_scr = score;
	link->ef = ef;
	link->best_prev = NULL;
	
	link->next = from->links;
	from->links = link;
    } else {
	/* Link already exists; just retain the best link_scr */
	if (link->link_scr < score) {
	    link->link_scr = score;
	    link->ef = ef;
	}
    }
}
/*
 * Allocate a pnode with the given attributes and automatically link it to the global
 * list.  Return the allocated node pointer.
 */
static pnode_t *alloc_pnode (s3wid_t w, int32 pos,
			     s3cipid_t ci, s3cipid_t lc, s3cipid_t rc,
			     word_posn_t wpos)
{
    pnode_t *p;

    p = (pnode_t *) listelem_alloc (sizeof(pnode_t));
    p->wid = w;
    p->ci = ci;
    p->lc = lc;
    p->rc = rc;
    p->pos = pos;

    p->pid = mdef_phone_id_nearest (mdef, ci, lc, rc, wpos);

    p->succlist = NULL;
    p->predlist = NULL;
    p->next = NULL;
    
    p->id = n_pnode++;
    
    p->startstate = NULL;
    
    p->alloc_next = pnode_list;
    pnode_list = p;
    
    return p;
}
static void build_stseg (history_t *rooth)
{
    history_t *h, *prevh;
    align_stseg_t *stseg, *tail;
    int32 f, prevscr;
    
    assert (align_stseg == NULL);

    prevscr = 0;
    prevh = NULL;
    for (f = 0, h = rooth; h; h = h->pred, f++) {
	stseg = (align_stseg_t *) listelem_alloc (sizeof(align_stseg_t));
	if (! align_stseg)
	    align_stseg = stseg;
	else
	    tail->next = stseg;
	tail = stseg;
	stseg->next = NULL;
	
	stseg->pid = h->snode->pnode->pid;
	stseg->state = h->snode->state;
	stseg->start = ((! prevh) || (prevh->snode->pnode->id != h->snode->pnode->id));
	stseg->score = h->score - prevscr + score_scale[f];
	stseg->bsdiff = h->score;

	prevscr = h->score;
	prevh = h;
    }
}
/*
 * Append a pnode to a list of pnodes (maintained in a list of plinks).
 */
static plink_t *append_pnode (plink_t *list, pnode_t *node)
{
    plink_t *l;
    
    l = (plink_t *) listelem_alloc (sizeof(plink_t));
    l->node = node;
    l->next = list;
    return l;
}
Пример #5
0
/*
 * Create a degenerate DAG (linear sequence of nodes) for the given hyp line.
 * The DAG contains a terminal sentinel silwid node.
 */
static dag_t *hypline2dag (char *ref_uttid, char *line)
{
    char junk1[4096], junk2[4096], uttid[4096];
    s3wid_t wid[MAX_UTT_LEN];
    int32 i, n;
    dag_t *dag;
    dagnode_t *d;
    
    if ((n = line2wid (dict, line, wid, MAX_UTT_LEN-1, 0, uttid)) < 0)
	E_FATAL("Error in parsing hyp line: %s\n", line);
    
    /* Verify uttid with ref_uttid */
    if (strcmp (uttid, ref_uttid) != 0) {
	strcpy (junk1, uttid);
	ucase (junk1);
	strcpy (junk2, ref_uttid);
	ucase (junk2);
	if (strcmp (junk1, junk2) != 0)
	    E_FATAL("Uttid mismatch: %s(ref), %s(hyp)\n", ref_uttid, uttid);
    }

    /* Build DAG from word sequence */
    dag = ckd_calloc (1, sizeof(dag_t));
    dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *));
    dag->nnode = 0;
    dag->nfrm = 0;
    dag->nlink = 0;

    for (i = 0; i < n; i++) {
	if ((NOT_WID(wid[i])) || (wid[i] >= oovbegin))
	    E_FATAL("%s: Unknown word in line: %s\n", uttid, line);
	
	/* Create DAG node for word */
	d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t));
	wid2dagnode (d, i, wid[i]);
	
	dag->node_sf[i] = d;
	if (i > 0) {
	    dag_link (dag->node_sf[i-1], d);
	    dag->nlink++;
	}
	
	dag->nnode++;
    }
    dag->nfrm = dag->nnode;
    
    dag->entry.src = NULL;
    dag->entry.dst = dag->node_sf[0];
    dag->entry.next = NULL;

    dag->exit.src = NULL;
    dag->exit.dst = dag->node_sf[dag->nnode - 1];
    dag->exit.next = NULL;
    
    return dag;
}
Пример #6
0
static hyp_t *nbestfile_parseline (char *sent)
{
    char *lp;
    hyp_t *head, *tail, *hyp;
    char word[1024];
    int32 k, sf, len, lscr;
    
    head = tail = NULL;
    
    lp = sent;
    /* Parse T <score> */
    if ((sscanf (lp, "%s%d%n", word, &k, &len) != 2) || (strcmp (word, "T") != 0)) {
	E_ERROR("Bad sentence: %s\n", sent);
	return NULL;
    }
    lp += len;
    
    /* Parse A <score> */
    if ((sscanf (lp, "%s%d%n", word, &k, &len) != 2) || (strcmp (word, "A") != 0)) {
	E_ERROR("Bad sentence: %s\n", sent);
	return NULL;
    }
    lp += len;
    
    /* Parse L <score> */
    if ((sscanf (lp, "%s%d%n", word, &lscr, &len) != 2) || (strcmp (word, "L") != 0)) {
	E_ERROR("Bad sentence: %s\n", sent);
	return NULL;
    }
    lp += len;
    
    /* Parse each hyp word */
    while ((k = sscanf (lp, "%d%s%n", &sf, word, &len)) == 2) {
	lp += len;

	hyp = (hyp_t *) listelem_alloc (sizeof(hyp_t));
	hyp->word = (char *) ckd_salloc (word);
	hyp->sf = sf;
	hyp->lscr = lscr;	/* HACK!! Every entry has the TOTAL LM score */
	hyp->next = NULL;
	if (! head)
	    head = hyp;
	else
	    tail->next = hyp;
	tail = hyp;
    }
    
    if ((k > 0) || (sscanf (lp, "%s", word) > 0)) {
	E_ERROR("Bad sentence: %s\n", sent);
	hyp_free (head);
	return NULL;
    }
    
    return head;
}
/*
 * Append an snode to a list of snodes (maintained in a list of slinks).
 */
static slink_t *append_snode (slink_t *list, snode_t *node, int32 prob)
{
    slink_t *l;
    
    l = (slink_t *) listelem_alloc (sizeof(slink_t));
    l->node = node;
    l->next = list;
    l->prob = prob;

    return l;
}
static history_t *lat_entry (snode_t *s)
{
    history_t *h;
    
    h = (history_t *) listelem_alloc (sizeof(history_t));
    h->snode = s;
    h->score = s->newscore;
    h->pred = s->newhist;
    
    h->alloc_next = hist_head;
    hist_head = h;
    
    return h;
}
Пример #9
0
static void homfile_load (char *file)
{
    FILE *fp;
    char line[16380], w1[4096], w2[4096];
    int32 k, n;
    s3wid_t wid1, wid2;
    s3cipid_t ci[1];
    hom_t *h;
    
    E_INFO("Reading homophones file %s\n", file);
    if ((fp = fopen(file, "r")) == NULL)
	E_FATAL("fopen(%s,r) failed\n", file);
    
    ci[0] = (s3cipid_t) 0;	/* Dummy */
    
    n = 0;
    while (fgets (line, sizeof(line), fp) != NULL) {
	if ((k = sscanf (line, "%s %s", w1, w2)) == 2) {
	    wid1 = dict_wordid (dict, w1);
	    if (NOT_WID(wid1)) {
		E_INFO("Adding %s to dictionary\n", w1);
		wid1 = dict_add_word (dict, w1, ci, 1);
		if (NOT_WID(wid1))
		    E_FATAL("dict_add_word(%s) failed\n", w1);
	    }
	    
	    wid2 = dict_wordid (dict, w2);
	    if ((NOT_WID(wid2)) || (wid2 >= oovbegin))
		E_FATAL("%s not in dictionary\n", w2);

	    h = (hom_t *) listelem_alloc (sizeof(hom_t));
	    h->w1 = wid1;
	    h->w2 = wid2;
	    h->next = homlist;
	    homlist = h;
	    
	    n++;
	} else
	    E_FATAL("Bad homophones line: %s\n", line);
    }
    
    E_INFO("%d homophone pairs read\n", n);
    
    fclose (fp);
}
static void build_wdseg (history_t *rooth)
{
    history_t *h, *nh;
    align_wdseg_t *wdseg, *tail;
    int32 f, prevf, prevscr, scale, bsdiff;
    
    assert (align_wdseg == NULL);

    prevscr = 0;
    bsdiff = 0;
    scale = 0;
    prevf = -1;
    
    for (f = 0, h = rooth; h; h = h->pred, f++) {
	bsdiff += h->score;
	scale += score_scale[f];
	
	nh = h->pred;
	if ((! nh) ||
	    ((nh->snode->pnode->id != h->snode->pnode->id) &&
	     (nh->snode->pnode->pos == 0))) {	/* End of current word */

	    wdseg = (align_wdseg_t *) listelem_alloc (sizeof(align_wdseg_t));
	    if (! align_wdseg)
		align_wdseg = wdseg;
	    else
		tail->next = wdseg;
	    tail = wdseg;
	    wdseg->next = NULL;
	    
	    wdseg->wid = h->snode->pnode->wid;
	    wdseg->sf = prevf+1;
	    wdseg->ef = f;
	    wdseg->score = h->score - prevscr + scale,
	    wdseg->bsdiff = bsdiff;
	    
	    bsdiff = 0;
	    scale = 0;
	    prevscr = h->score;
	    prevf = f;
	}
    }
}
static void build_phseg (history_t *rooth)
{
    history_t *h, *nh;
    align_phseg_t *phseg, *tail;
    int32 f, prevf, prevscr, scale, bsdiff;
    
    assert (align_phseg == NULL);

    prevscr = 0;
    bsdiff = 0;
    scale = 0;
    prevf = -1;
    
    for (f = 0, h = rooth; h; h = h->pred, f++) {
	bsdiff += h->score;
	scale += score_scale[f];
	
	nh = h->pred;
	if ((! nh) || (nh->snode->pnode->id != h->snode->pnode->id)) {
	    phseg = (align_phseg_t *) listelem_alloc (sizeof(align_phseg_t));
	    if (! align_phseg)
		align_phseg = phseg;
	    else
		tail->next = phseg;
	    tail = phseg;
	    phseg->next = NULL;
	    
	    phseg->pid = h->snode->pnode->pid;
	    phseg->sf = prevf+1;
	    phseg->ef = f;
	    phseg->score = h->score - prevscr + scale,
	    phseg->bsdiff = bsdiff;
	    
	    bsdiff = 0;
	    scale = 0;
	    prevscr = h->score;
	    prevf = f;
	}
    }
}
Пример #12
0
static void bypass_filler_nodes ( void )
{
    latnode_t *node, *to, *from, *prev_node, *t_node;
    latlink_t *link, *f_link, *t_link, *prev_link;
    rev_latlink_t *revlink, *t_revlink;
    int32 score;
    
    /* Create reverse links for all links pointing to filler nodes */
    for (node = latnode_list; node; node = node->next) {
	for (link = node->links; link; link = link->next) {
	    to = link->to;
	    if (ISA_FILLER_WORD(to->wid)) {
		revlink = (rev_latlink_t *) listelem_alloc (sizeof (rev_latlink_t));
		revlink->link = link;
		revlink->next = to->revlinks;
		to->revlinks = revlink;
	    }
	}
    }

    /* Bypass filler nodes */
    for (node = latnode_list; node; node = node->next) {
	if (! ISA_FILLER_WORD(node->wid))
	    continue;
	
	/* Replace each link entering filler node with links to all its successors */
	for (revlink = node->revlinks; revlink; revlink = revlink->next) {
	    link = revlink->link;	/* link entering filler node */
	    from = link->from;
	    
	    score = (node->wid == sil_wid) ? sil_pen : filler_pen;
	    score += link->link_scr;
	    
	    /*
	     * Make links from predecessor of filler (from) to successors of filler.
	     * But if successor is a filler, it has already been eliminated since it
	     * appears earlier in latnode_list (see build...).  So it can be skipped.
	     * Likewise, no reverse links needed for the new links; none of them
	     * points to a filler node.
	     */
	    for (f_link = node->links; f_link; f_link = f_link->next) {
		if (! ISA_FILLER_WORD(f_link->to->wid))
		    link_latnodes (from, f_link->to, score + f_link->link_scr, link->ef);
	    }
	}
    }

    /* Delete filler nodes and all links and reverse links from it */
    prev_node = NULL;
    for (node = latnode_list; node; node = t_node) {
	t_node = node->next;
	if (ISA_FILLER_WORD(node->wid)) {
	    for (revlink = node->revlinks; revlink; revlink = t_revlink) {
		t_revlink = revlink->next;
		revlink->link->to = NULL;
		listelem_free (revlink, sizeof(rev_latlink_t));
	    }

	    for (link = node->links; link; link = t_link) {
		t_link = link->next;
		listelem_free (link, sizeof(latlink_t));
	    }

	    if (prev_node)
		prev_node->next = t_node;
	    else
		latnode_list = t_node;
		
	    listelem_free (node, sizeof(latnode_t));
	} else
	    prev_node = node;
    }

    /* Reclaim links pointing nowhere */
    for (node = latnode_list; node; node = node->next) {
	prev_link = NULL;
	for (link = node->links; link; link = t_link) {
	    t_link = link->next;
	    if (link->to == NULL) {
		if (prev_link)
		    prev_link->next = t_link;
		else
		    node->links = t_link;
		listelem_free (link, sizeof(latlink_t));
	    } else
		prev_link = link;
	}
    }
}
Пример #13
0
/*
 * Load a DAG from a file: each unique <word-id,start-frame> is a node, i.e. with
 * a single start time but it can represent several end times.  Links are created
 * whenever nodes are adjacent in time.
 * Return value: ptr to DAG structure if successful; NULL otherwise.
 */
dag_t *dag_load (char *file)
{
    FILE *fp;
    dag_t *dag;
    int32 seqid, sf, fef, lef, ef;
    char line[16384], wd[4096];
    int32 i, j, k;
    dagnode_t *d, *d2, **darray;
    s3wid_t w;
    int32 fudge, min_ef_range;
    
    E_INFO("Reading DAG file: %s\n", file);
    if ((fp = fopen (file, "r")) == NULL) {
	E_ERROR("fopen(%s,r) failed\n", file);
	return NULL;
    }

    dag = ckd_calloc (1, sizeof(dag_t));
    dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *));
    dag->nnode = 0;
    dag->nlink = 0;
    dag->nfrm = 0;
    
    /* Read Frames parameter */
    if ((dag->nfrm = dag_param_read (fp, "Frames")) <= 0)
	E_FATAL("%s: Frames parameter missing or invalid\n", file);
    
    /* Read Nodes parameter */
    if ((dag->nnode = dag_param_read (fp, "Nodes")) <= 0)
	E_FATAL("%s: Nodes parameter missing or invalid\n", file);
    
    /* Read nodes */
    darray = (dagnode_t **) ckd_calloc (dag->nnode, sizeof(dagnode_t *));
    for (i = 0; i < dag->nnode; i++) {
	if (fgets (line, sizeof(line), fp) == NULL)
	    E_FATAL("%s: Premature EOF\n", file);
	
	if ((k = sscanf (line, "%d %s %d %d %d", &seqid, wd, &sf, &fef, &lef)) != 5)
	    E_FATAL("%s: Bad line: %s\n", file, line);
	if ((sf < 0) || (sf >= dag->nfrm) ||
	    (fef < 0) || ( fef >= dag->nfrm) ||
	    (lef < 0) || ( lef >= dag->nfrm))
	    E_FATAL("%s: Bad frame info: %s\n", file, line);
	
	w = dict_wordid (dict, wd);
	if (NOT_WID(w))
	    E_FATAL("%s: Unknown word: %s\n", file, line);
	
	if (seqid != i)
	    E_FATAL("%s: Seqno error: %s\n", file, line);
	
	d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t));
	darray[i] = d;
	
	d->wid = w;
	d->seqid = seqid;
	d->reachable = 0;
	d->sf = sf;
	d->fef = fef;
	d->lef = lef;
	d->succlist = NULL;
	d->predlist = NULL;
	d->next = dag->node_sf[sf];
	dag->node_sf[sf] = d;
    }

    /* Read initial node ID */
    if (((k = dag_param_read (fp, "Initial")) < 0) || (k >= dag->nnode))
	E_FATAL("%s: Initial node parameter missing or invalid\n", file);
    dag->entry.src = NULL;
    dag->entry.dst = darray[k];
    dag->entry.next = NULL;

    /* Read final node ID */
    if (((k = dag_param_read (fp, "Final")) < 0) || (k >= dag->nnode))
	E_FATAL("%s: Final node parameter missing or invalid\n", file);
    dag->exit.src = NULL;
    dag->exit.dst = darray[k];
    dag->exit.next = NULL;
    
    ckd_free (darray);	/* That's all I need darray for??? */

    /* Read bestsegscore entries; just to make sure all nodes have been read */
    if ((k = dag_param_read (fp, "BestSegAscr")) < 0)
	E_FATAL("%s: BestSegAscr parameter missing\n", file);
    fclose (fp);
    
    /*
     * Build edges based on time-adjacency.
     * min_ef_range = min. endframes that a node must persist for it to be not ignored.
     * fudge = #frames to be fudged around word begin times
     */
    min_ef_range = *((int32 *) cmd_ln_access ("-min_endfr"));
    fudge = *((int32 *) cmd_ln_access ("-dagfudge"));
    if (min_ef_range <= 0)
	E_FATAL("Bad min_endfr argument: %d\n", min_ef_range);
    if ((fudge < 0) || (fudge > 2))
	E_FATAL("Bad dagfudge argument: %d\n", fudge);

    dag->nlink = 0;
    for (sf = 0; sf < dag->nfrm; sf++) {
	for (d = dag->node_sf[sf]; d; d = d->next) {
	    if ((d->lef - d->fef < min_ef_range - 1) && (d != dag->entry.dst))
		continue;
	    if (d->wid == finishwid)
		continue;
	    
	    for (ef = d->fef - fudge + 1; ef <= d->lef + 1; ef++) {
		for (d2 = dag->node_sf[ef]; d2; d2 = d2->next) {
		    if ((d2->lef - d2->fef < min_ef_range - 1) && (d2 != dag->exit.dst))
			continue;
		    
		    dag_link (d, d2);
		    dag->nlink++;
		}
	    }
	}
    }
    
    return dag;
}