static s64 do_count(Sentence sent, int lw, int rw, Connector *le, Connector *re, int cost) { Disjunct * d; s64 total, pseudototal; int start_word, end_word, w; s64 leftcount, rightcount; int lcost, rcost, Lmatch, Rmatch; Match_node * m, *m1; Table_connector *t; count_context_t *ctxt = sent->count_ctxt; if (cost < 0) return 0; /* will we ever call it with cost<0 ? */ t = find_table_pointer(ctxt, lw, rw, le, re, cost); if (t == NULL) { /* Create the table entry with a tentative cost of 0. * This cost must be updated before we return. */ t = table_store(ctxt, lw, rw, le, re, cost, 0); } else { return t->count; } if (rw == 1+lw) { /* lw and rw are neighboring words */ /* You can't have a linkage here with cost > 0 */ if ((le == NULL) && (re == NULL) && (cost == 0)) { t->count = 1; } else { t->count = 0; } return t->count; } if ((le == NULL) && (re == NULL)) { if (!ctxt->islands_ok && (lw != -1)) { /* If we don't allow islands (a set of words linked together * but separate from the rest of the sentence) then the cost * of skipping n words is just n */ if (cost == ((rw-lw-1) + ctxt->null_block-1)/ctxt->null_block) { /* If null_block=4 then the cost of 1,2,3,4 nulls is 1; and 5,6,7,8 is 2 etc. */ t->count = 1; } else { t->count = 0; } return t->count; } if (cost == 0) { /* There is no zero-cost solution in this case. There is * a slight efficiency hack to separate this cost=0 case * out, but not necessary for correctness */ t->count = 0; } else { total = 0; w = lw+1; for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) { if (d->left == NULL) { total += do_count(sent, w, rw, d->right, NULL, cost-1); } } total += do_count(sent, w, rw, NULL, NULL, cost-1); t->count = total; } return t->count; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw-1; } else { end_word = re->word; } total = 0; for (w = start_word; w < end_word+1; w++) { m1 = m = form_match_list(sent, w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; for (lcost = 0; lcost <= cost; lcost++) { rcost = cost-lcost; /* Now lcost and rcost are the costs we're assigning * to those parts respectively */ /* Now, we determine if (based on table only) we can see that the current range is not parsable. */ Lmatch = (le != NULL) && (d->left != NULL) && do_match(sent, le, d->left, lw, w); Rmatch = (d->right != NULL) && (re != NULL) && do_match(sent, d->right, re, w, rw); rightcount = leftcount = 0; if (Lmatch) { leftcount = pseudocount(sent, lw, w, le->next, d->left->next, lcost); if (le->multi) leftcount += pseudocount(sent, lw, w, le, d->left->next, lcost); if (d->left->multi) leftcount += pseudocount(sent, lw, w, le->next, d->left, lcost); if (le->multi && d->left->multi) leftcount += pseudocount(sent, lw, w, le, d->left, lcost); } if (Rmatch) { rightcount = pseudocount(sent, w, rw, d->right->next, re->next, rcost); if (d->right->multi) rightcount += pseudocount(sent, w,rw,d->right,re->next, rcost); if (re->multi) rightcount += pseudocount(sent, w, rw, d->right->next, re, rcost); if (d->right->multi && re->multi) rightcount += pseudocount(sent, w, rw, d->right, re, rcost); } /* total number where links are used on both sides */ pseudototal = leftcount*rightcount; if (leftcount > 0) { /* evaluate using the left match, but not the right */ pseudototal += leftcount * pseudocount(sent, w, rw, d->right, re, rcost); } if ((le == NULL) && (rightcount > 0)) { /* evaluate using the right match, but not the left */ pseudototal += rightcount * pseudocount(sent, lw, w, le, d->left, lcost); } /* now pseudototal is 0 implies that we know that the true total is 0 */ if (pseudototal != 0) { rightcount = leftcount = 0; if (Lmatch) { leftcount = do_count(sent, lw, w, le->next, d->left->next, lcost); if (le->multi) leftcount += do_count(sent, lw, w, le, d->left->next, lcost); if (d->left->multi) leftcount += do_count(sent, lw, w, le->next, d->left, lcost); if (le->multi && d->left->multi) leftcount += do_count(sent, lw, w, le, d->left, lcost); } if (Rmatch) { rightcount = do_count(sent, w, rw, d->right->next, re->next, rcost); if (d->right->multi) rightcount += do_count(sent, w,rw,d->right,re->next, rcost); if (re->multi) rightcount += do_count(sent, w, rw, d->right->next, re, rcost); if (d->right->multi && re->multi) rightcount += do_count(sent, w, rw, d->right, re, rcost); } total += leftcount*rightcount; /* total number where links are used on both sides */ if (leftcount > 0) { /* evaluate using the left match, but not the right */ total += leftcount * do_count(sent, w, rw, d->right, re, rcost); } if ((le == NULL) && (rightcount > 0)) { /* evaluate using the right match, but not the left */ total += rightcount * do_count(sent, lw, w, le, d->left, lcost); } } } } put_match_list(sent, m1); } t->count = total; return total; }
static Count_bin do_count(fast_matcher_t *mchxt, count_context_t *ctxt, int lw, int rw, Connector *le, Connector *re, int null_count) { Count_bin zero = hist_zero(); Count_bin total; int start_word, end_word, w; Table_connector *t; assert (0 <= null_count, "Bad null count"); t = find_table_pointer(ctxt, lw, rw, le, re, null_count); if (t) return t->count; /* Create the table entry with a tentative null count of 0. * This count must be updated before we return. */ t = table_store(ctxt, lw, rw, le, re, null_count); if (rw == 1+lw) { /* lw and rw are neighboring words */ /* You can't have a linkage here with null_count > 0 */ if ((le == NULL) && (re == NULL) && (null_count == 0)) { t->count = hist_one(); } else { t->count = zero; } return t->count; } /* The left and right connectors are null, but the two words are * NOT next to each-other. */ if ((le == NULL) && (re == NULL)) { if (!ctxt->islands_ok && (lw != -1)) { /* If we don't allow islands (a set of words linked together * but separate from the rest of the sentence) then the * null_count of skipping n words is just n. */ if (null_count == (rw-lw-1)) { t->count = hist_one(); } else { t->count = zero; } return t->count; } if (null_count == 0) { /* There is no solution without nulls in this case. There is * a slight efficiency hack to separate this null_count==0 * case out, but not necessary for correctness */ t->count = zero; } else { t->count = zero; Disjunct * d; int w = lw + 1; for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) { if (d->left == NULL) { hist_accumv(&t->count, d->cost, do_count(mchxt, ctxt, w, rw, d->right, NULL, null_count-1)); } } hist_accumv(&t->count, 0.0, do_count(mchxt, ctxt, w, rw, NULL, NULL, null_count-1)); } return t->count; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw; } else { end_word = re->word +1; } total = zero; for (w = start_word; w < end_word; w++) { Match_node *m, *m1; m1 = m = form_match_list(mchxt, w, le, lw, re, rw); for (; m != NULL; m = m->next) { unsigned int lnull_cnt, rnull_cnt; Disjunct * d = m->d; /* _p1 avoids a gcc warning about unsafe loop opt */ unsigned int null_count_p1 = null_count + 1; for (lnull_cnt = 0; lnull_cnt < null_count_p1; lnull_cnt++) { bool Lmatch, Rmatch; bool leftpcount = false; bool rightpcount = false; bool pseudototal = false; rnull_cnt = null_count - lnull_cnt; /* Now lnull_cnt and rnull_cnt are the costs we're assigning * to those parts respectively */ /* Now, we determine if (based on table only) we can see that the current range is not parsable. */ Lmatch = (le != NULL) && (d->left != NULL) && do_match(le, d->left, lw, w); Rmatch = (d->right != NULL) && (re != NULL) && do_match(d->right, re, w, rw); /* First, perform pseudocounting as an optimization. If * the pseudocount is zero, then we know that the true * count will be zero, and so skip counting entirely, * in that case. */ if (Lmatch) { leftpcount = pseudocount(ctxt, lw, w, le->next, d->left->next, lnull_cnt); if (!leftpcount && le->multi) leftpcount = pseudocount(ctxt, lw, w, le, d->left->next, lnull_cnt); if (!leftpcount && d->left->multi) leftpcount = pseudocount(ctxt, lw, w, le->next, d->left, lnull_cnt); if (!leftpcount && le->multi && d->left->multi) leftpcount = pseudocount(ctxt, lw, w, le, d->left, lnull_cnt); } if (Rmatch) { rightpcount = pseudocount(ctxt, w, rw, d->right->next, re->next, rnull_cnt); if (!rightpcount && d->right->multi) rightpcount = pseudocount(ctxt, w,rw, d->right, re->next, rnull_cnt); if (!rightpcount && re->multi) rightpcount = pseudocount(ctxt, w, rw, d->right->next, re, rnull_cnt); if (!rightpcount && d->right->multi && re->multi) rightpcount = pseudocount(ctxt, w, rw, d->right, re, rnull_cnt); } /* Total number where links are used on both sides */ pseudototal = leftpcount && rightpcount; if (!pseudototal && leftpcount) { /* Evaluate using the left match, but not the right. */ pseudototal = pseudocount(ctxt, w, rw, d->right, re, rnull_cnt); } if (!pseudototal && (le == NULL) && rightpcount) { /* Evaluate using the right match, but not the left. */ pseudototal = pseudocount(ctxt, lw, w, le, d->left, lnull_cnt); } /* If pseudototal is zero (false), that implies that * we know that the true total is zero. So we don't * bother counting at all, in that case. */ if (pseudototal) { Count_bin leftcount = zero; Count_bin rightcount = zero; if (Lmatch) { leftcount = do_count(mchxt, ctxt, lw, w, le->next, d->left->next, lnull_cnt); if (le->multi) hist_accumv(&leftcount, d->cost, do_count(mchxt, ctxt, lw, w, le, d->left->next, lnull_cnt)); if (d->left->multi) hist_accumv(&leftcount, d->cost, do_count(mchxt, ctxt, lw, w, le->next, d->left, lnull_cnt)); if (le->multi && d->left->multi) hist_accumv(&leftcount, d->cost, do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt)); } if (Rmatch) { rightcount = do_count(mchxt, ctxt, w, rw, d->right->next, re->next, rnull_cnt); if (d->right->multi) hist_accumv(&rightcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right,re->next, rnull_cnt)); if (re->multi) hist_accumv(&rightcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right->next, re, rnull_cnt)); if (d->right->multi && re->multi) hist_accumv(&rightcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt)); } /* Total number where links are used on both sides */ hist_muladd(&total, &leftcount, 0.0, &rightcount); if (0 < hist_total(&leftcount)) { /* Evaluate using the left match, but not the right */ hist_muladdv(&total, &leftcount, d->cost, do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt)); } if ((le == NULL) && (0 < hist_total(&rightcount))) { /* Evaluate using the right match, but not the left */ hist_muladdv(&total, &rightcount, d->cost, do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt)); } /* Sigh. Overflows can and do occur, esp for the ANY language. */ if (INT_MAX < hist_total(&total)) { #ifdef PERFORM_COUNT_HISTOGRAMMING total.total = INT_MAX; #else total = INT_MAX; #endif /* PERFORM_COUNT_HISTOGRAMMING */ t->count = total; put_match_list(mchxt, m1); return total; } } } } put_match_list(mchxt, m1); } t->count = total; return total; }