/** * returns NULL if there are no ways to parse, or returns a pointer * to a set structure representing all the ways to parse. * * This code is similar to do_count() in count.c -- for a good reason: * the do_count() function did a full parse, but didn't actually * allocate an memory structures to hold the parse. This also does * a full parse, but it also allocates and fills out the various * parse structures. */ static Parse_set * mk_parse_set(Sentence sent, fast_matcher_t *mchxt, count_context_t * ctxt, Disjunct *ld, Disjunct *rd, int lw, int rw, Connector *le, Connector *re, unsigned int null_count, bool islands_ok, Parse_info pi) { Disjunct * d, * dis; int start_word, end_word, w; bool Lmatch, Rmatch; unsigned int lnull_count, rnull_count; int i, j; Parse_set *ls[4], *rs[4], *lset, *rset; Parse_choice * a_choice; Match_node * m, *m1; X_table_connector *xt; s64 count; assert(null_count < 0x7fff, "mk_parse_set() called with null_count < 0."); count = table_lookup(ctxt, lw, rw, le, re, null_count); /* assert(count >= 0, "mk_parse_set() called on params that were not in the table."); Actually, we can't assert this, because of the pseudocount technique that's used in count(). It's not the case that every call to mk_parse_set() has already been put into the table. */ if ((count == 0) || (count == -1)) return NULL; xt = x_table_pointer(lw, rw, le, re, null_count, pi); if (xt != NULL) return xt->set; /* we've already computed it */ /* Start it out with the empty set of options. */ /* This entry must be updated before we return. */ xt = x_table_store(lw, rw, le, re, null_count, pi); xt->set->count = count; /* the count we already computed */ /* this count is non-zero */ if (rw == 1 + lw) return xt->set; if ((le == NULL) && (re == NULL)) { if (!islands_ok && (lw != -1)) return xt->set; if (null_count == 0) return xt->set; w = lw + 1; for (dis = sent->word[w].d; dis != NULL; dis = dis->next) { if (dis->left == NULL) { rs[0] = mk_parse_set(sent, mchxt, ctxt, dis, NULL, w, rw, dis->right, NULL, null_count-1, islands_ok, pi); if (rs[0] == NULL) continue; a_choice = make_choice(dummy_set(), lw, w, NULL, NULL, rs[0], w, rw, NULL, NULL, NULL, NULL, NULL); put_choice_in_set(xt->set, a_choice); } } rs[0] = mk_parse_set(sent, mchxt, ctxt, NULL, NULL, w, rw, NULL, NULL, null_count-1, islands_ok, pi); if (rs[0] != NULL) { a_choice = make_choice(dummy_set(), lw, w, NULL, NULL, rs[0], w, rw, NULL, NULL, NULL, NULL, NULL); put_choice_in_set(xt->set, a_choice); } return xt->set; } if (le == NULL) { start_word = lw + 1; } else { start_word = le->word; } if (re == NULL) { end_word = rw; } else { end_word = re->word + 1; } /* This condition can never be true here. It is included so GCC will be able * to optimize the loop over "null_count". Without this check, GCC thinks this * loop may be an infinite loop and it may omit some optimizations. */ if (UINT_MAX == null_count) return NULL; for (w = start_word; w < end_word; w++) { m1 = m = form_match_list(mchxt, w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; for (lnull_count = 0; lnull_count <= null_count; lnull_count++) { rnull_count = null_count-lnull_count; /* now lnull_count and rnull_count are the null_counts we're assigning to * those parts respectively */ /* Now, we determine if (based on table only) we can see that the current range is not parsable. */ Lmatch = (le != NULL) && (d->left != NULL) && do_match(le, d->left, lw, w); Rmatch = (d->right != NULL) && (re != NULL) && do_match(d->right, re, w, rw); for (i=0; i<4; i++) { ls[i] = rs[i] = NULL; } if (Lmatch) { ls[0] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le->next, d->left->next, lnull_count, islands_ok, pi); if (le->multi) ls[1] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le, d->left->next, lnull_count, islands_ok, pi); if (d->left->multi) ls[2] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le->next, d->left, lnull_count, islands_ok, pi); if (le->multi && d->left->multi) ls[3] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le, d->left, lnull_count, islands_ok, pi); } if (Rmatch) { rs[0] = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right->next, re->next, rnull_count, islands_ok, pi); if (d->right->multi) rs[1] = mk_parse_set(sent, mchxt, ctxt, d, rd, w,rw,d->right,re->next, rnull_count, islands_ok, pi); if (re->multi) rs[2] = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right->next, re, rnull_count, islands_ok, pi); if (d->right->multi && re->multi) rs[3] = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right, re, rnull_count, islands_ok, pi); } for (i=0; i<4; i++) { /* this ordering is probably not consistent with that * needed to use list_links */ if (ls[i] == NULL) continue; for (j=0; j<4; j++) { if (rs[j] == NULL) continue; a_choice = make_choice(ls[i], lw, w, le, d->left, rs[j], w, rw, d->right, re, ld, d, rd); put_choice_in_set(xt->set, a_choice); } } if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL) { /* evaluate using the left match, but not the right */ rset = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right, re, rnull_count, islands_ok, pi); if (rset != NULL) { for (i=0; i<4; i++) { if (ls[i] == NULL) continue; /* this ordering is probably not consistent with * that needed to use list_links */ a_choice = make_choice(ls[i], lw, w, le, d->left, rset, w, rw, NULL /* d->right */, re, /* the NULL indicates no link*/ ld, d, rd); put_choice_in_set(xt->set, a_choice); } } } if ((le == NULL) && (rs[0] != NULL || rs[1] != NULL || rs[2] != NULL || rs[3] != NULL)) { /* evaluate using the right match, but not the left */ lset = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le, d->left, lnull_count, islands_ok, pi); if (lset != NULL) { for (i=0; i<4; i++) { if (rs[i] == NULL) continue; /* this ordering is probably not consistent with * that needed to use list_links */ a_choice = make_choice(lset, lw, w, NULL /* le */, d->left, /* NULL indicates no link */ rs[i], w, rw, d->right, re, ld, d, rd); put_choice_in_set(xt->set, a_choice); } } } } } put_match_list(mchxt, m1); } xt->set->current = xt->set->first; return xt->set; }
Parse_set * parse_set(Disjunct *ld, Disjunct *rd, int lw, int rw, Connector *le, Connector *re, int cost, Parse_info * pi) { /* returns NULL if there are no ways to parse, or returns a pointer to a set structure representing all the ways to parse */ Disjunct * d, * dis; int start_word, end_word, w; int lcost, rcost, Lmatch, Rmatch; int i, j; Parse_set *ls[4], *rs[4], *lset, *rset; Parse_choice * a_choice; Match_node * m, *m1; X_table_connector *xt; int count; assert(cost >= 0, "parse_set() called with cost < 0."); count = table_lookup(lw, rw, le, re, cost); /* assert(count >= 0, "parse_set() called on params that were not in the table."); Actually, we can't assert this, because of the pseudocount technique that's used in count(). It's not the case that every call to parse_set() has already been put into the table. */ if ((count == 0) || (count == -1)) return NULL; xt = x_table_pointer(lw, rw, le, re, cost, pi); if (xt == NULL) { xt = x_table_store(lw, rw, le, re, cost, empty_set(), pi); /* start it out with the empty set of options */ /* this entry must be updated before we return */ } else { return xt->set; /* we've already computed it */ } xt->set->count = count; /* the count we already computed */ /* this count is non-zero */ if (rw == 1+lw) return xt->set; if ((le == NULL) && (re == NULL)) { if (!islands_ok && (lw != -1)) { return xt->set; } if (cost == 0) { return xt->set; } else { w = lw+1; for (dis = local_sent[w].d; dis != NULL; dis = dis->next) { if (dis->left == NULL) { rs[0] = parse_set(dis, NULL, w, rw, dis->right, NULL, cost-1, pi); if (rs[0] == NULL) continue; a_choice = make_choice(dummy_set(), lw, w, NULL, NULL, rs[0], w, rw, NULL, NULL, NULL, NULL, NULL); put_choice_in_set(xt->set, a_choice); } } rs[0] = parse_set(NULL, NULL, w, rw, NULL, NULL, cost-1, pi); if (rs[0] != NULL) { a_choice = make_choice(dummy_set(), lw, w, NULL, NULL, rs[0], w, rw, NULL, NULL, NULL, NULL, NULL); put_choice_in_set(xt->set, a_choice); } return xt->set; } } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw-1; } else { end_word = re->word; } for (w=start_word; w <= end_word; w++) { m1 = m = form_match_list(w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; for (lcost = 0; lcost <= cost; lcost++) { rcost = cost-lcost; /* now lcost and rcost are the costs we're assigning to those parts respectively */ /* Now, we determine if (based on table only) we can see that the current range is not parsable. */ Lmatch = (le != NULL) && (d->left != NULL) && match(le, d->left, lw, w); Rmatch = (d->right != NULL) && (re != NULL) && match(d->right, re, w, rw); for (i=0; i<4; i++) {ls[i] = rs[i] = NULL;} if (Lmatch) { ls[0] = parse_set(ld, d, lw, w, le->next, d->left->next, lcost, pi); if (le->multi) ls[1] = parse_set(ld, d, lw, w, le, d->left->next, lcost, pi); if (d->left->multi) ls[2] = parse_set(ld, d, lw, w, le->next, d->left, lcost, pi); if (le->multi && d->left->multi) ls[3] = parse_set(ld, d, lw, w, le, d->left, lcost, pi); } if (Rmatch) { rs[0] = parse_set(d, rd, w, rw, d->right->next, re->next, rcost, pi); if (d->right->multi) rs[1] = parse_set(d, rd, w,rw,d->right,re->next, rcost, pi); if (re->multi) rs[2] = parse_set(d, rd, w, rw, d->right->next, re, rcost, pi); if (d->right->multi && re->multi) rs[3] = parse_set(d, rd, w, rw, d->right, re, rcost, pi); } for (i=0; i<4; i++) { /* this ordering is probably not consistent with that needed to use list_links */ if (ls[i] == NULL) continue; for (j=0; j<4; j++) { if (rs[j] == NULL) continue; a_choice = make_choice(ls[i], lw, w, le, d->left, rs[j], w, rw, d->right, re, ld, d, rd); put_choice_in_set(xt->set, a_choice); } } if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL) { /* evaluate using the left match, but not the right */ rset = parse_set(d, rd, w, rw, d->right, re, rcost, pi); if (rset != NULL) { for (i=0; i<4; i++) { if (ls[i] == NULL) continue; /* this ordering is probably not consistent with that needed to use list_links */ a_choice = make_choice(ls[i], lw, w, le, d->left, rset, w, rw, NULL /* d->right */, re, /* the NULL indicates no link*/ ld, d, rd); put_choice_in_set(xt->set, a_choice); } } } if ((le == NULL) && (rs[0] != NULL || rs[1] != NULL || rs[2] != NULL || rs[3] != NULL)) { /* evaluate using the right match, but not the left */ lset = parse_set(ld, d, lw, w, le, d->left, lcost, pi); if (lset != NULL) { for (i=0; i<4; i++) { if (rs[i] == NULL) continue; /* this ordering is probably not consistent with that needed to use list_links */ a_choice = make_choice(lset, lw, w, NULL /* le */, d->left, /* NULL indicates no link */ rs[i], w, rw, d->right, re, ld, d, rd); put_choice_in_set(xt->set, a_choice); } } } } } put_match_list(m1); } xt->set->current = xt->set->first; return xt->set; }
/** * Mark as useful all disjuncts involved in some way to complete the * structure within the current region. Note that only disjuncts * strictly between lw and rw will be marked. If it so happens that * this region itself is not valid, then this fact will be recorded * in the table, and nothing else happens. */ static void mark_region(Sentence sent, int lw, int rw, Connector *le, Connector *re) { Disjunct * d; int left_valid, right_valid, i; int start_word, end_word; int w; Match_node * m, *m1; count_context_t *ctxt = sent->count_ctxt; i = region_valid(sent, lw, rw, le, re); if ((i==0) || (i==2)) return; /* we only reach this point if it's a valid unmarked region, i=1 */ table_update(ctxt, lw, rw, le, re, 0, 2); if ((le == NULL) && (re == NULL) && (ctxt->null_links) && (rw != 1+lw)) { w = lw+1; for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) { if ((d->left == NULL) && region_valid(sent, w, rw, d->right, NULL)) { d->marked = TRUE; mark_region(sent, w, rw, d->right, NULL); } } mark_region(sent, w, rw, NULL, NULL); return; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw-1; } else { end_word = re->word; } for (w=start_word; w < end_word+1; w++) { m1 = m = form_match_list(sent, w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; /* mark_cost++;*/ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) && ((region_valid(sent, lw, w, le->next, d->left->next)) || ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) || ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) || ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left)))); right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) && ((region_valid(sent, w, rw, d->right->next,re->next)) || ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) || ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) || ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re)))); /* The following if statements could be restructured to avoid superfluous calls to mark_region. It didn't seem a high priority, so I didn't optimize this. */ if (left_valid && region_valid(sent, w, rw, d->right, re)) { d->marked = TRUE; mark_region(sent, w, rw, d->right, re); mark_region(sent, lw, w, le->next, d->left->next); if (le->multi) mark_region(sent, lw, w, le, d->left->next); if (d->left->multi) mark_region(sent, lw, w, le->next, d->left); if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left); } if (right_valid && region_valid(sent, lw, w, le, d->left)) { d->marked = TRUE; mark_region(sent, lw, w, le, d->left); mark_region(sent, w, rw, d->right->next,re->next); if (d->right->multi) mark_region(sent, w,rw,d->right,re->next); if (re->multi) mark_region(sent, w, rw, d->right->next, re); if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re); } if (left_valid && right_valid) { d->marked = TRUE; mark_region(sent, lw, w, le->next, d->left->next); if (le->multi) mark_region(sent, lw, w, le, d->left->next); if (d->left->multi) mark_region(sent, lw, w, le->next, d->left); if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left); mark_region(sent, w, rw, d->right->next,re->next); if (d->right->multi) mark_region(sent, w,rw,d->right,re->next); if (re->multi) mark_region(sent, w, rw, d->right->next, re); if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re); } } put_match_list(sent, m1); } }
/** * Returns 0 if this range cannot be successfully filled in with * links. Returns 1 if it can, and it's not been marked, and returns * 2 if it can and it has been marked. */ static int region_valid(Sentence sent, int lw, int rw, Connector *le, Connector *re) { Disjunct * d; int left_valid, right_valid, found; int i, start_word, end_word; int w; Match_node * m, *m1; count_context_t *ctxt = sent->count_ctxt; i = table_lookup(sent, lw, rw, le, re, 0); if (i >= 0) return i; if ((le == NULL) && (re == NULL) && ctxt->deletable[lw][rw]) { table_store(ctxt, lw, rw, le, re, 0, 1); return 1; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw-1; } else { end_word = re->word; } found = 0; for (w=start_word; w < end_word+1; w++) { m1 = m = form_match_list(sent, w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; /* mark_cost++;*/ /* in the following expressions we use the fact that 0=FALSE. Could eliminate by always saying "region_valid(...) != 0" */ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) && ((region_valid(sent, lw, w, le->next, d->left->next)) || ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) || ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) || ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left)))); if (left_valid && region_valid(sent, w, rw, d->right, re)) { found = 1; break; } right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) && ((region_valid(sent, w, rw, d->right->next,re->next)) || ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) || ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) || ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re)))); if ((left_valid && right_valid) || (right_valid && region_valid(sent, lw, w, le, d->left))) { found = 1; break; } } put_match_list(sent, m1); if (found != 0) break; } table_store(ctxt, lw, rw, le, re, 0, found); return found; }
static s64 do_count(Sentence sent, int lw, int rw, Connector *le, Connector *re, int cost) { Disjunct * d; s64 total, pseudototal; int start_word, end_word, w; s64 leftcount, rightcount; int lcost, rcost, Lmatch, Rmatch; Match_node * m, *m1; Table_connector *t; count_context_t *ctxt = sent->count_ctxt; if (cost < 0) return 0; /* will we ever call it with cost<0 ? */ t = find_table_pointer(ctxt, lw, rw, le, re, cost); if (t == NULL) { /* Create the table entry with a tentative cost of 0. * This cost must be updated before we return. */ t = table_store(ctxt, lw, rw, le, re, cost, 0); } else { return t->count; } if (rw == 1+lw) { /* lw and rw are neighboring words */ /* You can't have a linkage here with cost > 0 */ if ((le == NULL) && (re == NULL) && (cost == 0)) { t->count = 1; } else { t->count = 0; } return t->count; } if ((le == NULL) && (re == NULL)) { if (!ctxt->islands_ok && (lw != -1)) { /* If we don't allow islands (a set of words linked together * but separate from the rest of the sentence) then the cost * of skipping n words is just n */ if (cost == ((rw-lw-1) + ctxt->null_block-1)/ctxt->null_block) { /* If null_block=4 then the cost of 1,2,3,4 nulls is 1; and 5,6,7,8 is 2 etc. */ t->count = 1; } else { t->count = 0; } return t->count; } if (cost == 0) { /* There is no zero-cost solution in this case. There is * a slight efficiency hack to separate this cost=0 case * out, but not necessary for correctness */ t->count = 0; } else { total = 0; w = lw+1; for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) { if (d->left == NULL) { total += do_count(sent, w, rw, d->right, NULL, cost-1); } } total += do_count(sent, w, rw, NULL, NULL, cost-1); t->count = total; } return t->count; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw-1; } else { end_word = re->word; } total = 0; for (w = start_word; w < end_word+1; w++) { m1 = m = form_match_list(sent, w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; for (lcost = 0; lcost <= cost; lcost++) { rcost = cost-lcost; /* Now lcost and rcost are the costs we're assigning * to those parts respectively */ /* Now, we determine if (based on table only) we can see that the current range is not parsable. */ Lmatch = (le != NULL) && (d->left != NULL) && do_match(sent, le, d->left, lw, w); Rmatch = (d->right != NULL) && (re != NULL) && do_match(sent, d->right, re, w, rw); rightcount = leftcount = 0; if (Lmatch) { leftcount = pseudocount(sent, lw, w, le->next, d->left->next, lcost); if (le->multi) leftcount += pseudocount(sent, lw, w, le, d->left->next, lcost); if (d->left->multi) leftcount += pseudocount(sent, lw, w, le->next, d->left, lcost); if (le->multi && d->left->multi) leftcount += pseudocount(sent, lw, w, le, d->left, lcost); } if (Rmatch) { rightcount = pseudocount(sent, w, rw, d->right->next, re->next, rcost); if (d->right->multi) rightcount += pseudocount(sent, w,rw,d->right,re->next, rcost); if (re->multi) rightcount += pseudocount(sent, w, rw, d->right->next, re, rcost); if (d->right->multi && re->multi) rightcount += pseudocount(sent, w, rw, d->right, re, rcost); } /* total number where links are used on both sides */ pseudototal = leftcount*rightcount; if (leftcount > 0) { /* evaluate using the left match, but not the right */ pseudototal += leftcount * pseudocount(sent, w, rw, d->right, re, rcost); } if ((le == NULL) && (rightcount > 0)) { /* evaluate using the right match, but not the left */ pseudototal += rightcount * pseudocount(sent, lw, w, le, d->left, lcost); } /* now pseudototal is 0 implies that we know that the true total is 0 */ if (pseudototal != 0) { rightcount = leftcount = 0; if (Lmatch) { leftcount = do_count(sent, lw, w, le->next, d->left->next, lcost); if (le->multi) leftcount += do_count(sent, lw, w, le, d->left->next, lcost); if (d->left->multi) leftcount += do_count(sent, lw, w, le->next, d->left, lcost); if (le->multi && d->left->multi) leftcount += do_count(sent, lw, w, le, d->left, lcost); } if (Rmatch) { rightcount = do_count(sent, w, rw, d->right->next, re->next, rcost); if (d->right->multi) rightcount += do_count(sent, w,rw,d->right,re->next, rcost); if (re->multi) rightcount += do_count(sent, w, rw, d->right->next, re, rcost); if (d->right->multi && re->multi) rightcount += do_count(sent, w, rw, d->right, re, rcost); } total += leftcount*rightcount; /* total number where links are used on both sides */ if (leftcount > 0) { /* evaluate using the left match, but not the right */ total += leftcount * do_count(sent, w, rw, d->right, re, rcost); } if ((le == NULL) && (rightcount > 0)) { /* evaluate using the right match, but not the left */ total += rightcount * do_count(sent, lw, w, le, d->left, lcost); } } } } put_match_list(sent, m1); } t->count = total; return total; }
static Count_bin do_count(fast_matcher_t *mchxt, count_context_t *ctxt, int lw, int rw, Connector *le, Connector *re, int null_count) { Count_bin zero = hist_zero(); Count_bin total; int start_word, end_word, w; Table_connector *t; assert (0 <= null_count, "Bad null count"); t = find_table_pointer(ctxt, lw, rw, le, re, null_count); if (t) return t->count; /* Create the table entry with a tentative null count of 0. * This count must be updated before we return. */ t = table_store(ctxt, lw, rw, le, re, null_count); if (rw == 1+lw) { /* lw and rw are neighboring words */ /* You can't have a linkage here with null_count > 0 */ if ((le == NULL) && (re == NULL) && (null_count == 0)) { t->count = hist_one(); } else { t->count = zero; } return t->count; } /* The left and right connectors are null, but the two words are * NOT next to each-other. */ if ((le == NULL) && (re == NULL)) { if (!ctxt->islands_ok && (lw != -1)) { /* If we don't allow islands (a set of words linked together * but separate from the rest of the sentence) then the * null_count of skipping n words is just n. */ if (null_count == (rw-lw-1)) { t->count = hist_one(); } else { t->count = zero; } return t->count; } if (null_count == 0) { /* There is no solution without nulls in this case. There is * a slight efficiency hack to separate this null_count==0 * case out, but not necessary for correctness */ t->count = zero; } else { t->count = zero; Disjunct * d; int w = lw + 1; for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) { if (d->left == NULL) { hist_accumv(&t->count, d->cost, do_count(mchxt, ctxt, w, rw, d->right, NULL, null_count-1)); } } hist_accumv(&t->count, 0.0, do_count(mchxt, ctxt, w, rw, NULL, NULL, null_count-1)); } return t->count; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw; } else { end_word = re->word +1; } total = zero; for (w = start_word; w < end_word; w++) { Match_node *m, *m1; m1 = m = form_match_list(mchxt, w, le, lw, re, rw); for (; m != NULL; m = m->next) { unsigned int lnull_cnt, rnull_cnt; Disjunct * d = m->d; /* _p1 avoids a gcc warning about unsafe loop opt */ unsigned int null_count_p1 = null_count + 1; for (lnull_cnt = 0; lnull_cnt < null_count_p1; lnull_cnt++) { bool Lmatch, Rmatch; bool leftpcount = false; bool rightpcount = false; bool pseudototal = false; rnull_cnt = null_count - lnull_cnt; /* Now lnull_cnt and rnull_cnt are the costs we're assigning * to those parts respectively */ /* Now, we determine if (based on table only) we can see that the current range is not parsable. */ Lmatch = (le != NULL) && (d->left != NULL) && do_match(le, d->left, lw, w); Rmatch = (d->right != NULL) && (re != NULL) && do_match(d->right, re, w, rw); /* First, perform pseudocounting as an optimization. If * the pseudocount is zero, then we know that the true * count will be zero, and so skip counting entirely, * in that case. */ if (Lmatch) { leftpcount = pseudocount(ctxt, lw, w, le->next, d->left->next, lnull_cnt); if (!leftpcount && le->multi) leftpcount = pseudocount(ctxt, lw, w, le, d->left->next, lnull_cnt); if (!leftpcount && d->left->multi) leftpcount = pseudocount(ctxt, lw, w, le->next, d->left, lnull_cnt); if (!leftpcount && le->multi && d->left->multi) leftpcount = pseudocount(ctxt, lw, w, le, d->left, lnull_cnt); } if (Rmatch) { rightpcount = pseudocount(ctxt, w, rw, d->right->next, re->next, rnull_cnt); if (!rightpcount && d->right->multi) rightpcount = pseudocount(ctxt, w,rw, d->right, re->next, rnull_cnt); if (!rightpcount && re->multi) rightpcount = pseudocount(ctxt, w, rw, d->right->next, re, rnull_cnt); if (!rightpcount && d->right->multi && re->multi) rightpcount = pseudocount(ctxt, w, rw, d->right, re, rnull_cnt); } /* Total number where links are used on both sides */ pseudototal = leftpcount && rightpcount; if (!pseudototal && leftpcount) { /* Evaluate using the left match, but not the right. */ pseudototal = pseudocount(ctxt, w, rw, d->right, re, rnull_cnt); } if (!pseudototal && (le == NULL) && rightpcount) { /* Evaluate using the right match, but not the left. */ pseudototal = pseudocount(ctxt, lw, w, le, d->left, lnull_cnt); } /* If pseudototal is zero (false), that implies that * we know that the true total is zero. So we don't * bother counting at all, in that case. */ if (pseudototal) { Count_bin leftcount = zero; Count_bin rightcount = zero; if (Lmatch) { leftcount = do_count(mchxt, ctxt, lw, w, le->next, d->left->next, lnull_cnt); if (le->multi) hist_accumv(&leftcount, d->cost, do_count(mchxt, ctxt, lw, w, le, d->left->next, lnull_cnt)); if (d->left->multi) hist_accumv(&leftcount, d->cost, do_count(mchxt, ctxt, lw, w, le->next, d->left, lnull_cnt)); if (le->multi && d->left->multi) hist_accumv(&leftcount, d->cost, do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt)); } if (Rmatch) { rightcount = do_count(mchxt, ctxt, w, rw, d->right->next, re->next, rnull_cnt); if (d->right->multi) hist_accumv(&rightcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right,re->next, rnull_cnt)); if (re->multi) hist_accumv(&rightcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right->next, re, rnull_cnt)); if (d->right->multi && re->multi) hist_accumv(&rightcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt)); } /* Total number where links are used on both sides */ hist_muladd(&total, &leftcount, 0.0, &rightcount); if (0 < hist_total(&leftcount)) { /* Evaluate using the left match, but not the right */ hist_muladdv(&total, &leftcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt)); } if ((le == NULL) && (0 < hist_total(&rightcount))) { /* Evaluate using the right match, but not the left */ hist_muladdv(&total, &rightcount, d->cost, do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt)); } /* Sigh. Overflows can and do occur, esp for the ANY language. */ if (INT_MAX < hist_total(&total)) { #ifdef PERFORM_COUNT_HISTOGRAMMING total.total = INT_MAX; #else total = INT_MAX; #endif /* PERFORM_COUNT_HISTOGRAMMING */ t->count = total; put_match_list(mchxt, m1); return total; } } } } put_match_list(mchxt, m1); } t->count = total; return total; }