/* * Create a directed link between "from" and "to" nodes, but if a link already exists, * choose one with the best link_scr. */ static void link_latnodes (latnode_t *from, latnode_t *to, int32 score, int32 ef) { latlink_t *link; assert (to->reachable); /* Look for an existing link between "from" and "to" nodes */ for (link = from->links; link && (link->to != to); link = link->next); if (! link) { /* No link between the two nodes; create a new one */ link = (latlink_t *) listelem_alloc (sizeof(latlink_t)); link->from = from; link->to = to; link->link_scr = score; link->ef = ef; link->best_prev = NULL; link->next = from->links; from->links = link; } else { /* Link already exists; just retain the best link_scr */ if (link->link_scr < score) { link->link_scr = score; link->ef = ef; } } }
/* * Allocate a pnode with the given attributes and automatically link it to the global * list. Return the allocated node pointer. */ static pnode_t *alloc_pnode (s3wid_t w, int32 pos, s3cipid_t ci, s3cipid_t lc, s3cipid_t rc, word_posn_t wpos) { pnode_t *p; p = (pnode_t *) listelem_alloc (sizeof(pnode_t)); p->wid = w; p->ci = ci; p->lc = lc; p->rc = rc; p->pos = pos; p->pid = mdef_phone_id_nearest (mdef, ci, lc, rc, wpos); p->succlist = NULL; p->predlist = NULL; p->next = NULL; p->id = n_pnode++; p->startstate = NULL; p->alloc_next = pnode_list; pnode_list = p; return p; }
static void build_stseg (history_t *rooth) { history_t *h, *prevh; align_stseg_t *stseg, *tail; int32 f, prevscr; assert (align_stseg == NULL); prevscr = 0; prevh = NULL; for (f = 0, h = rooth; h; h = h->pred, f++) { stseg = (align_stseg_t *) listelem_alloc (sizeof(align_stseg_t)); if (! align_stseg) align_stseg = stseg; else tail->next = stseg; tail = stseg; stseg->next = NULL; stseg->pid = h->snode->pnode->pid; stseg->state = h->snode->state; stseg->start = ((! prevh) || (prevh->snode->pnode->id != h->snode->pnode->id)); stseg->score = h->score - prevscr + score_scale[f]; stseg->bsdiff = h->score; prevscr = h->score; prevh = h; } }
/* * Append a pnode to a list of pnodes (maintained in a list of plinks). */ static plink_t *append_pnode (plink_t *list, pnode_t *node) { plink_t *l; l = (plink_t *) listelem_alloc (sizeof(plink_t)); l->node = node; l->next = list; return l; }
/* * Create a degenerate DAG (linear sequence of nodes) for the given hyp line. * The DAG contains a terminal sentinel silwid node. */ static dag_t *hypline2dag (char *ref_uttid, char *line) { char junk1[4096], junk2[4096], uttid[4096]; s3wid_t wid[MAX_UTT_LEN]; int32 i, n; dag_t *dag; dagnode_t *d; if ((n = line2wid (dict, line, wid, MAX_UTT_LEN-1, 0, uttid)) < 0) E_FATAL("Error in parsing hyp line: %s\n", line); /* Verify uttid with ref_uttid */ if (strcmp (uttid, ref_uttid) != 0) { strcpy (junk1, uttid); ucase (junk1); strcpy (junk2, ref_uttid); ucase (junk2); if (strcmp (junk1, junk2) != 0) E_FATAL("Uttid mismatch: %s(ref), %s(hyp)\n", ref_uttid, uttid); } /* Build DAG from word sequence */ dag = ckd_calloc (1, sizeof(dag_t)); dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *)); dag->nnode = 0; dag->nfrm = 0; dag->nlink = 0; for (i = 0; i < n; i++) { if ((NOT_WID(wid[i])) || (wid[i] >= oovbegin)) E_FATAL("%s: Unknown word in line: %s\n", uttid, line); /* Create DAG node for word */ d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t)); wid2dagnode (d, i, wid[i]); dag->node_sf[i] = d; if (i > 0) { dag_link (dag->node_sf[i-1], d); dag->nlink++; } dag->nnode++; } dag->nfrm = dag->nnode; dag->entry.src = NULL; dag->entry.dst = dag->node_sf[0]; dag->entry.next = NULL; dag->exit.src = NULL; dag->exit.dst = dag->node_sf[dag->nnode - 1]; dag->exit.next = NULL; return dag; }
static hyp_t *nbestfile_parseline (char *sent) { char *lp; hyp_t *head, *tail, *hyp; char word[1024]; int32 k, sf, len, lscr; head = tail = NULL; lp = sent; /* Parse T <score> */ if ((sscanf (lp, "%s%d%n", word, &k, &len) != 2) || (strcmp (word, "T") != 0)) { E_ERROR("Bad sentence: %s\n", sent); return NULL; } lp += len; /* Parse A <score> */ if ((sscanf (lp, "%s%d%n", word, &k, &len) != 2) || (strcmp (word, "A") != 0)) { E_ERROR("Bad sentence: %s\n", sent); return NULL; } lp += len; /* Parse L <score> */ if ((sscanf (lp, "%s%d%n", word, &lscr, &len) != 2) || (strcmp (word, "L") != 0)) { E_ERROR("Bad sentence: %s\n", sent); return NULL; } lp += len; /* Parse each hyp word */ while ((k = sscanf (lp, "%d%s%n", &sf, word, &len)) == 2) { lp += len; hyp = (hyp_t *) listelem_alloc (sizeof(hyp_t)); hyp->word = (char *) ckd_salloc (word); hyp->sf = sf; hyp->lscr = lscr; /* HACK!! Every entry has the TOTAL LM score */ hyp->next = NULL; if (! head) head = hyp; else tail->next = hyp; tail = hyp; } if ((k > 0) || (sscanf (lp, "%s", word) > 0)) { E_ERROR("Bad sentence: %s\n", sent); hyp_free (head); return NULL; } return head; }
/* * Append an snode to a list of snodes (maintained in a list of slinks). */ static slink_t *append_snode (slink_t *list, snode_t *node, int32 prob) { slink_t *l; l = (slink_t *) listelem_alloc (sizeof(slink_t)); l->node = node; l->next = list; l->prob = prob; return l; }
static history_t *lat_entry (snode_t *s) { history_t *h; h = (history_t *) listelem_alloc (sizeof(history_t)); h->snode = s; h->score = s->newscore; h->pred = s->newhist; h->alloc_next = hist_head; hist_head = h; return h; }
static void homfile_load (char *file) { FILE *fp; char line[16380], w1[4096], w2[4096]; int32 k, n; s3wid_t wid1, wid2; s3cipid_t ci[1]; hom_t *h; E_INFO("Reading homophones file %s\n", file); if ((fp = fopen(file, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", file); ci[0] = (s3cipid_t) 0; /* Dummy */ n = 0; while (fgets (line, sizeof(line), fp) != NULL) { if ((k = sscanf (line, "%s %s", w1, w2)) == 2) { wid1 = dict_wordid (dict, w1); if (NOT_WID(wid1)) { E_INFO("Adding %s to dictionary\n", w1); wid1 = dict_add_word (dict, w1, ci, 1); if (NOT_WID(wid1)) E_FATAL("dict_add_word(%s) failed\n", w1); } wid2 = dict_wordid (dict, w2); if ((NOT_WID(wid2)) || (wid2 >= oovbegin)) E_FATAL("%s not in dictionary\n", w2); h = (hom_t *) listelem_alloc (sizeof(hom_t)); h->w1 = wid1; h->w2 = wid2; h->next = homlist; homlist = h; n++; } else E_FATAL("Bad homophones line: %s\n", line); } E_INFO("%d homophone pairs read\n", n); fclose (fp); }
static void build_wdseg (history_t *rooth) { history_t *h, *nh; align_wdseg_t *wdseg, *tail; int32 f, prevf, prevscr, scale, bsdiff; assert (align_wdseg == NULL); prevscr = 0; bsdiff = 0; scale = 0; prevf = -1; for (f = 0, h = rooth; h; h = h->pred, f++) { bsdiff += h->score; scale += score_scale[f]; nh = h->pred; if ((! nh) || ((nh->snode->pnode->id != h->snode->pnode->id) && (nh->snode->pnode->pos == 0))) { /* End of current word */ wdseg = (align_wdseg_t *) listelem_alloc (sizeof(align_wdseg_t)); if (! align_wdseg) align_wdseg = wdseg; else tail->next = wdseg; tail = wdseg; wdseg->next = NULL; wdseg->wid = h->snode->pnode->wid; wdseg->sf = prevf+1; wdseg->ef = f; wdseg->score = h->score - prevscr + scale, wdseg->bsdiff = bsdiff; bsdiff = 0; scale = 0; prevscr = h->score; prevf = f; } } }
static void build_phseg (history_t *rooth) { history_t *h, *nh; align_phseg_t *phseg, *tail; int32 f, prevf, prevscr, scale, bsdiff; assert (align_phseg == NULL); prevscr = 0; bsdiff = 0; scale = 0; prevf = -1; for (f = 0, h = rooth; h; h = h->pred, f++) { bsdiff += h->score; scale += score_scale[f]; nh = h->pred; if ((! nh) || (nh->snode->pnode->id != h->snode->pnode->id)) { phseg = (align_phseg_t *) listelem_alloc (sizeof(align_phseg_t)); if (! align_phseg) align_phseg = phseg; else tail->next = phseg; tail = phseg; phseg->next = NULL; phseg->pid = h->snode->pnode->pid; phseg->sf = prevf+1; phseg->ef = f; phseg->score = h->score - prevscr + scale, phseg->bsdiff = bsdiff; bsdiff = 0; scale = 0; prevscr = h->score; prevf = f; } } }
static void bypass_filler_nodes ( void ) { latnode_t *node, *to, *from, *prev_node, *t_node; latlink_t *link, *f_link, *t_link, *prev_link; rev_latlink_t *revlink, *t_revlink; int32 score; /* Create reverse links for all links pointing to filler nodes */ for (node = latnode_list; node; node = node->next) { for (link = node->links; link; link = link->next) { to = link->to; if (ISA_FILLER_WORD(to->wid)) { revlink = (rev_latlink_t *) listelem_alloc (sizeof (rev_latlink_t)); revlink->link = link; revlink->next = to->revlinks; to->revlinks = revlink; } } } /* Bypass filler nodes */ for (node = latnode_list; node; node = node->next) { if (! ISA_FILLER_WORD(node->wid)) continue; /* Replace each link entering filler node with links to all its successors */ for (revlink = node->revlinks; revlink; revlink = revlink->next) { link = revlink->link; /* link entering filler node */ from = link->from; score = (node->wid == sil_wid) ? sil_pen : filler_pen; score += link->link_scr; /* * Make links from predecessor of filler (from) to successors of filler. * But if successor is a filler, it has already been eliminated since it * appears earlier in latnode_list (see build...). So it can be skipped. * Likewise, no reverse links needed for the new links; none of them * points to a filler node. */ for (f_link = node->links; f_link; f_link = f_link->next) { if (! ISA_FILLER_WORD(f_link->to->wid)) link_latnodes (from, f_link->to, score + f_link->link_scr, link->ef); } } } /* Delete filler nodes and all links and reverse links from it */ prev_node = NULL; for (node = latnode_list; node; node = t_node) { t_node = node->next; if (ISA_FILLER_WORD(node->wid)) { for (revlink = node->revlinks; revlink; revlink = t_revlink) { t_revlink = revlink->next; revlink->link->to = NULL; listelem_free (revlink, sizeof(rev_latlink_t)); } for (link = node->links; link; link = t_link) { t_link = link->next; listelem_free (link, sizeof(latlink_t)); } if (prev_node) prev_node->next = t_node; else latnode_list = t_node; listelem_free (node, sizeof(latnode_t)); } else prev_node = node; } /* Reclaim links pointing nowhere */ for (node = latnode_list; node; node = node->next) { prev_link = NULL; for (link = node->links; link; link = t_link) { t_link = link->next; if (link->to == NULL) { if (prev_link) prev_link->next = t_link; else node->links = t_link; listelem_free (link, sizeof(latlink_t)); } else prev_link = link; } } }
/* * Load a DAG from a file: each unique <word-id,start-frame> is a node, i.e. with * a single start time but it can represent several end times. Links are created * whenever nodes are adjacent in time. * Return value: ptr to DAG structure if successful; NULL otherwise. */ dag_t *dag_load (char *file) { FILE *fp; dag_t *dag; int32 seqid, sf, fef, lef, ef; char line[16384], wd[4096]; int32 i, j, k; dagnode_t *d, *d2, **darray; s3wid_t w; int32 fudge, min_ef_range; E_INFO("Reading DAG file: %s\n", file); if ((fp = fopen (file, "r")) == NULL) { E_ERROR("fopen(%s,r) failed\n", file); return NULL; } dag = ckd_calloc (1, sizeof(dag_t)); dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *)); dag->nnode = 0; dag->nlink = 0; dag->nfrm = 0; /* Read Frames parameter */ if ((dag->nfrm = dag_param_read (fp, "Frames")) <= 0) E_FATAL("%s: Frames parameter missing or invalid\n", file); /* Read Nodes parameter */ if ((dag->nnode = dag_param_read (fp, "Nodes")) <= 0) E_FATAL("%s: Nodes parameter missing or invalid\n", file); /* Read nodes */ darray = (dagnode_t **) ckd_calloc (dag->nnode, sizeof(dagnode_t *)); for (i = 0; i < dag->nnode; i++) { if (fgets (line, sizeof(line), fp) == NULL) E_FATAL("%s: Premature EOF\n", file); if ((k = sscanf (line, "%d %s %d %d %d", &seqid, wd, &sf, &fef, &lef)) != 5) E_FATAL("%s: Bad line: %s\n", file, line); if ((sf < 0) || (sf >= dag->nfrm) || (fef < 0) || ( fef >= dag->nfrm) || (lef < 0) || ( lef >= dag->nfrm)) E_FATAL("%s: Bad frame info: %s\n", file, line); w = dict_wordid (dict, wd); if (NOT_WID(w)) E_FATAL("%s: Unknown word: %s\n", file, line); if (seqid != i) E_FATAL("%s: Seqno error: %s\n", file, line); d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t)); darray[i] = d; d->wid = w; d->seqid = seqid; d->reachable = 0; d->sf = sf; d->fef = fef; d->lef = lef; d->succlist = NULL; d->predlist = NULL; d->next = dag->node_sf[sf]; dag->node_sf[sf] = d; } /* Read initial node ID */ if (((k = dag_param_read (fp, "Initial")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Initial node parameter missing or invalid\n", file); dag->entry.src = NULL; dag->entry.dst = darray[k]; dag->entry.next = NULL; /* Read final node ID */ if (((k = dag_param_read (fp, "Final")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Final node parameter missing or invalid\n", file); dag->exit.src = NULL; dag->exit.dst = darray[k]; dag->exit.next = NULL; ckd_free (darray); /* That's all I need darray for??? */ /* Read bestsegscore entries; just to make sure all nodes have been read */ if ((k = dag_param_read (fp, "BestSegAscr")) < 0) E_FATAL("%s: BestSegAscr parameter missing\n", file); fclose (fp); /* * Build edges based on time-adjacency. * min_ef_range = min. endframes that a node must persist for it to be not ignored. * fudge = #frames to be fudged around word begin times */ min_ef_range = *((int32 *) cmd_ln_access ("-min_endfr")); fudge = *((int32 *) cmd_ln_access ("-dagfudge")); if (min_ef_range <= 0) E_FATAL("Bad min_endfr argument: %d\n", min_ef_range); if ((fudge < 0) || (fudge > 2)) E_FATAL("Bad dagfudge argument: %d\n", fudge); dag->nlink = 0; for (sf = 0; sf < dag->nfrm; sf++) { for (d = dag->node_sf[sf]; d; d = d->next) { if ((d->lef - d->fef < min_ef_range - 1) && (d != dag->entry.dst)) continue; if (d->wid == finishwid) continue; for (ef = d->fef - fudge + 1; ef <= d->lef + 1; ef++) { for (d2 = dag->node_sf[ef]; d2; d2 = d2->next) { if ((d2->lef - d2->fef < min_ef_range - 1) && (d2 != dag->exit.dst)) continue; dag_link (d, d2); dag->nlink++; } } } } return dag; }