/** * We've already built the sentence disjuncts, and we've pruned them * and power_pruned(GENTLE) them also. The sentence contains a * conjunction. deletable[][] has been initialized to indicate the * ranges which may be deleted in the final linkage. * * This routine deletes irrelevant disjuncts. It finds them by first * marking them all as irrelevant, and then marking the ones that * might be useable. Finally, the unmarked ones are removed. */ void conjunction_prune(Sentence sent, Parse_Options opts) { Disjunct * d; int w; count_context_t *ctxt = sent->count_ctxt; ctxt->current_resources = opts->resources; ctxt->deletable = sent->deletable; count_set_effective_distance(sent); /* We begin by unmarking all disjuncts. This would not be necessary if whenever we created a disjunct we cleared its marked field. I didn't want to search the program for all such places, so I did this way. XXX FIXME, someday ... */ for (w=0; w<sent->length; w++) { for (d=sent->word[w].d; d != NULL; d=d->next) { d->marked = FALSE; } } init_fast_matcher(sent); ctxt->local_sent = sent->word; ctxt->null_links = (opts->min_null_count > 0); /* for (d = sent->word[0].d; d != NULL; d = d->next) { if ((d->left == NULL) && region_valid(sent, 0, sent->length, d->right, NULL)) { mark_region(sent, 0, sent->length, d->right, NULL); d->marked = TRUE; } } mark_region(sent, 0, sent->length, NULL, NULL); */ if (ctxt->null_links) { mark_region(sent, -1, sent->length, NULL, NULL); } else { for (w=0; w<sent->length; w++) { /* consider removing the words [0,w-1] from the beginning of the sentence */ if (ctxt->deletable[-1][w]) { for (d = sent->word[w].d; d != NULL; d = d->next) { if ((d->left == NULL) && region_valid(sent, w, sent->length, d->right, NULL)) { mark_region(sent, w, sent->length, d->right, NULL); d->marked = TRUE; } } } } } delete_unmarked_disjuncts(sent); free_fast_matcher(sent); ctxt->local_sent = NULL; ctxt->current_resources = NULL; ctxt->deletable = NULL; count_unset_effective_distance(sent); }
/** * Mark as useful all disjuncts involved in some way to complete the * structure within the current region. Note that only disjuncts * strictly between lw and rw will be marked. If it so happens that * this region itself is not valid, then this fact will be recorded * in the table, and nothing else happens. */ static void mark_region(Sentence sent, int lw, int rw, Connector *le, Connector *re) { Disjunct * d; int left_valid, right_valid, i; int start_word, end_word; int w; Match_node * m, *m1; count_context_t *ctxt = sent->count_ctxt; i = region_valid(sent, lw, rw, le, re); if ((i==0) || (i==2)) return; /* we only reach this point if it's a valid unmarked region, i=1 */ table_update(ctxt, lw, rw, le, re, 0, 2); if ((le == NULL) && (re == NULL) && (ctxt->null_links) && (rw != 1+lw)) { w = lw+1; for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) { if ((d->left == NULL) && region_valid(sent, w, rw, d->right, NULL)) { d->marked = TRUE; mark_region(sent, w, rw, d->right, NULL); } } mark_region(sent, w, rw, NULL, NULL); return; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw-1; } else { end_word = re->word; } for (w=start_word; w < end_word+1; w++) { m1 = m = form_match_list(sent, w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; /* mark_cost++;*/ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) && ((region_valid(sent, lw, w, le->next, d->left->next)) || ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) || ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) || ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left)))); right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) && ((region_valid(sent, w, rw, d->right->next,re->next)) || ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) || ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) || ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re)))); /* The following if statements could be restructured to avoid superfluous calls to mark_region. It didn't seem a high priority, so I didn't optimize this. */ if (left_valid && region_valid(sent, w, rw, d->right, re)) { d->marked = TRUE; mark_region(sent, w, rw, d->right, re); mark_region(sent, lw, w, le->next, d->left->next); if (le->multi) mark_region(sent, lw, w, le, d->left->next); if (d->left->multi) mark_region(sent, lw, w, le->next, d->left); if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left); } if (right_valid && region_valid(sent, lw, w, le, d->left)) { d->marked = TRUE; mark_region(sent, lw, w, le, d->left); mark_region(sent, w, rw, d->right->next,re->next); if (d->right->multi) mark_region(sent, w,rw,d->right,re->next); if (re->multi) mark_region(sent, w, rw, d->right->next, re); if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re); } if (left_valid && right_valid) { d->marked = TRUE; mark_region(sent, lw, w, le->next, d->left->next); if (le->multi) mark_region(sent, lw, w, le, d->left->next); if (d->left->multi) mark_region(sent, lw, w, le->next, d->left); if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left); mark_region(sent, w, rw, d->right->next,re->next); if (d->right->multi) mark_region(sent, w,rw,d->right,re->next); if (re->multi) mark_region(sent, w, rw, d->right->next, re); if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re); } } put_match_list(sent, m1); } }
/** * Returns 0 if this range cannot be successfully filled in with * links. Returns 1 if it can, and it's not been marked, and returns * 2 if it can and it has been marked. */ static int region_valid(Sentence sent, int lw, int rw, Connector *le, Connector *re) { Disjunct * d; int left_valid, right_valid, found; int i, start_word, end_word; int w; Match_node * m, *m1; count_context_t *ctxt = sent->count_ctxt; i = table_lookup(sent, lw, rw, le, re, 0); if (i >= 0) return i; if ((le == NULL) && (re == NULL) && ctxt->deletable[lw][rw]) { table_store(ctxt, lw, rw, le, re, 0, 1); return 1; } if (le == NULL) { start_word = lw+1; } else { start_word = le->word; } if (re == NULL) { end_word = rw-1; } else { end_word = re->word; } found = 0; for (w=start_word; w < end_word+1; w++) { m1 = m = form_match_list(sent, w, le, lw, re, rw); for (; m!=NULL; m=m->next) { d = m->d; /* mark_cost++;*/ /* in the following expressions we use the fact that 0=FALSE. Could eliminate by always saying "region_valid(...) != 0" */ left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) && ((region_valid(sent, lw, w, le->next, d->left->next)) || ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) || ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) || ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left)))); if (left_valid && region_valid(sent, w, rw, d->right, re)) { found = 1; break; } right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) && ((region_valid(sent, w, rw, d->right->next,re->next)) || ((d->right->multi) && region_valid(sent, w,rw,d->right,re->next)) || ((re->multi) && region_valid(sent, w, rw, d->right->next, re)) || ((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re)))); if ((left_valid && right_valid) || (right_valid && region_valid(sent, lw, w, le, d->left))) { found = 1; break; } } put_match_list(sent, m1); if (found != 0) break; } table_store(ctxt, lw, rw, le, re, 0, found); return found; }