示例#1
0
文件: count.c 项目: luyi326/linguo
/**
 * We've already built the sentence disjuncts, and we've pruned them
 * and power_pruned(GENTLE) them also.  The sentence contains a
 * conjunction.  deletable[][] has been initialized to indicate the
 * ranges which may be deleted in the final linkage.
 *
 * This routine deletes irrelevant disjuncts.  It finds them by first
 * marking them all as irrelevant, and then marking the ones that
 * might be useable.  Finally, the unmarked ones are removed.
 */
void conjunction_prune(Sentence sent, Parse_Options opts)
{
	Disjunct * d;
	int w;
	count_context_t *ctxt = sent->count_ctxt;

	ctxt->current_resources = opts->resources;
	ctxt->deletable = sent->deletable;
	count_set_effective_distance(sent);

	/* We begin by unmarking all disjuncts.  This would not be necessary if
	   whenever we created a disjunct we cleared its marked field.
	   I didn't want to search the program for all such places, so
	   I did this way. XXX FIXME, someday ... 
	   */
	for (w=0; w<sent->length; w++) {
		for (d=sent->word[w].d; d != NULL; d=d->next) {
			d->marked = FALSE;
		}
	}

	init_fast_matcher(sent);
	ctxt->local_sent = sent->word;
	ctxt->null_links = (opts->min_null_count > 0);
	/*
	for (d = sent->word[0].d; d != NULL; d = d->next) {
		if ((d->left == NULL) && region_valid(sent, 0, sent->length, d->right, NULL)) {
			mark_region(sent, 0, sent->length, d->right, NULL);
			d->marked = TRUE;
		}
	}
	mark_region(sent, 0, sent->length, NULL, NULL);
	*/

	if (ctxt->null_links) {
		mark_region(sent, -1, sent->length, NULL, NULL);
	} else {
		for (w=0; w<sent->length; w++) {
		  /* consider removing the words [0,w-1] from the beginning
			 of the sentence */
			if (ctxt->deletable[-1][w]) {
				for (d = sent->word[w].d; d != NULL; d = d->next) {
					if ((d->left == NULL) && region_valid(sent, w, sent->length, d->right, NULL)) {
						mark_region(sent, w, sent->length, d->right, NULL);
						d->marked = TRUE;
					}
				}
			}
		}
	}

	delete_unmarked_disjuncts(sent);

	free_fast_matcher(sent);

	ctxt->local_sent = NULL;
	ctxt->current_resources = NULL;
	ctxt->deletable = NULL;
	count_unset_effective_distance(sent);
}
示例#2
0
/**
 * Mark as useful all disjuncts involved in some way to complete the
 * structure within the current region.  Note that only disjuncts
 * strictly between lw and rw will be marked.  If it so happens that
 * this region itself is not valid, then this fact will be recorded
 * in the table, and nothing else happens.
 */
static void mark_region(Sentence sent,
                        int lw, int rw, Connector *le, Connector *re)
{

	Disjunct * d;
	int left_valid, right_valid, i;
	int start_word, end_word;
	int w;
	Match_node * m, *m1;
	count_context_t *ctxt = sent->count_ctxt;

	i = region_valid(sent, lw, rw, le, re);
	if ((i==0) || (i==2)) return;
	/* we only reach this point if it's a valid unmarked region, i=1 */
	table_update(ctxt, lw, rw, le, re, 0, 2);

	if ((le == NULL) && (re == NULL) && (ctxt->null_links) && (rw != 1+lw)) {
		w = lw+1;
		for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) {
			if ((d->left == NULL) && region_valid(sent, w, rw, d->right, NULL)) {
				d->marked = TRUE;
				mark_region(sent, w, rw, d->right, NULL);
			}
		}
		mark_region(sent, w, rw, NULL, NULL);
		return;
	}

	if (le == NULL) {
		start_word = lw+1;
	} else {
		start_word = le->word;
	}
	if (re == NULL) {
		end_word = rw-1;
	} else {
		end_word = re->word;
	}

	for (w=start_word; w < end_word+1; w++) {
		m1 = m = form_match_list(sent, w, le, lw, re, rw);
		for (; m!=NULL; m=m->next) {
			d = m->d;
			/* mark_cost++;*/
			left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
						  ((region_valid(sent, lw, w, le->next, d->left->next)) ||
						   ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
						   ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
						   ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
			right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
						   ((region_valid(sent, w, rw, d->right->next,re->next)) ||
							((d->right->multi) && region_valid(sent, w,rw,d->right,re->next))  ||
							((re->multi) && region_valid(sent, w, rw, d->right->next, re)) ||
							((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));

			/* The following if statements could be restructured to avoid superfluous calls
			   to mark_region.  It didn't seem a high priority, so I didn't optimize this.
			   */

			if (left_valid && region_valid(sent, w, rw, d->right, re)) {
				d->marked = TRUE;
				mark_region(sent, w, rw, d->right, re);
				mark_region(sent, lw, w, le->next, d->left->next);
				if (le->multi) mark_region(sent, lw, w, le, d->left->next);
				if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
				if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
			}

			if (right_valid && region_valid(sent, lw, w, le, d->left)) {
				d->marked = TRUE;
				mark_region(sent, lw, w, le, d->left);
				mark_region(sent, w, rw, d->right->next,re->next);
				if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
				if (re->multi) mark_region(sent, w, rw, d->right->next, re);
				if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
			}

			if (left_valid && right_valid) {
				d->marked = TRUE;
				mark_region(sent, lw, w, le->next, d->left->next);
				if (le->multi) mark_region(sent, lw, w, le, d->left->next);
				if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
				if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
				mark_region(sent, w, rw, d->right->next,re->next);
				if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
				if (re->multi) mark_region(sent, w, rw, d->right->next, re);
				if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
			}
		}
		put_match_list(sent, m1);
	}
}
示例#3
0
/**
 * Returns 0 if this range cannot be successfully filled in with
 * links.  Returns 1 if it can, and it's not been marked, and returns
 * 2 if it can and it has been marked.
 */
static int region_valid(Sentence sent, int lw, int rw, Connector *le, Connector *re)
{
	Disjunct * d;
	int left_valid, right_valid, found;
	int i, start_word, end_word;
	int w;
	Match_node * m, *m1;

	count_context_t *ctxt = sent->count_ctxt;

	i = table_lookup(sent, lw, rw, le, re, 0);
	if (i >= 0) return i;

	if ((le == NULL) && (re == NULL) && ctxt->deletable[lw][rw]) {
		table_store(ctxt, lw, rw, le, re, 0, 1);
		return 1;
	}

	if (le == NULL) {
		start_word = lw+1;
	} else {
		start_word = le->word;
	}
	if (re == NULL) {
		end_word = rw-1;
	} else {
		end_word = re->word;
	}

	found = 0;

	for (w=start_word; w < end_word+1; w++) {
		m1 = m = form_match_list(sent, w, le, lw, re, rw);
		for (; m!=NULL; m=m->next) {
			d = m->d;
			/* mark_cost++;*/
			/* in the following expressions we use the fact that 0=FALSE. Could eliminate
			   by always saying "region_valid(...) != 0"  */
			left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
						  ((region_valid(sent, lw, w, le->next, d->left->next)) ||
						   ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
						   ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
						   ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
			if (left_valid && region_valid(sent, w, rw, d->right, re)) {
				found = 1;
				break;
			}
			right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
						   ((region_valid(sent, w, rw, d->right->next,re->next))	||
							((d->right->multi) && region_valid(sent, w,rw,d->right,re->next))  ||
							((re->multi) && region_valid(sent, w, rw, d->right->next, re))  ||
							((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
			if ((left_valid && right_valid) || (right_valid && region_valid(sent, lw, w, le, d->left))) {
				found = 1;
				break;
			}
		}
		put_match_list(sent, m1);
		if (found != 0) break;
	}
	table_store(ctxt, lw, rw, le, re, 0, found);
	return found;
}