Ejemplo n.º 1
0
/**
 * Prune useless disjuncts.
 */
void pp_and_power_prune(Sentence sent, Parse_Options opts)
{
	power_table pt;
	power_table_alloc(sent, &pt);
	power_table_init(sent, &pt);

	power_prune(sent, opts, &pt);
	if (pp_prune(sent, opts) > 0)
		power_prune(sent, opts, &pt);

	/* No benefit for now to make additional pp_prune() & power_prune() -
	 * additional deletions are very rare and even then most of the
	 * times only one disjunct is deleted. */

	power_table_delete(&pt);

	return;
}
Ejemplo n.º 2
0
/** The return value is the number of disjuncts deleted.
 *  Implementation notes:
 *  Normally all the identical disjunct-jets are memory shared.
 *  The suffix_id of each connector serves as its reference count
 *  in the power table. Each time when a connector that cannot match
 *  is discovered, its reference count is decreased, and its
 *  nearest_word field is assigned BAD_WORD. Due to the memory sharing,
 *  each such an assignment affects immediately all the identical
 *  disjunct-jets.
 *  */
static int power_prune(Sentence sent, Parse_Options opts)
{
	power_table pt;
	prune_context pc;
	int N_deleted[2] = {0}; /* [0] counts first deletions, [1] counts dups. */
	int total_deleted = 0;

	power_table_alloc(sent, &pt);
	power_table_init(sent, &pt);

	pc.pt = &pt;
	pc.power_cost = 0;
	pc.null_links = (opts->min_null_count > 0);
	pc.N_changed = 1;  /* forces it always to make at least two passes */
	pc.sent = sent;

	while (1)
	{
		/* left-to-right pass */
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct **dd = &sent->word[w].d; *dd != NULL; /* See: NEXT */)
			{
				Disjunct *d = *dd; /* just for convenience */
				if (d->left == NULL)
				{
					dd = &d->next;  /* NEXT */
					continue;
				}

				bool is_bad = d->left->nearest_word == BAD_WORD;

				if (is_bad || left_connector_list_update(&pc, d->left, w, true) < 0)
				{
					mark_connector_sequence_for_dequeue(d->left, true);
					mark_connector_sequence_for_dequeue(d->right, false);

					/* discard the current disjunct */
					*dd = d->next; /* NEXT - set current disjunct to the next one */
					N_deleted[(int)is_bad]++;
					continue;
				}

				dd = &d->next; /* NEXT */
			}

			clean_table(pt.r_table_size[w], pt.r_table[w]);
		}

		total_deleted += N_deleted[0] + N_deleted[1];
		lgdebug(D_PRUNE, "Debug: l->r pass changed %d and deleted %d (%d+%d)\n",
		        pc.N_changed, N_deleted[0]+N_deleted[1], N_deleted[0], N_deleted[1]);

		if (pc.N_changed == 0 && N_deleted[0] == 0 && N_deleted[1] == 0) break;
		pc.N_changed = N_deleted[0] = N_deleted[1] = 0;

		/* right-to-left pass */
		for (WordIdx w = sent->length-1; w != (WordIdx) -1; w--)
		{
			for (Disjunct **dd = &sent->word[w].d; *dd != NULL; /* See: NEXT */)
			{
				Disjunct *d = *dd; /* just for convenience */
				if (d->right == NULL)
				{
					dd = &d->next;  /* NEXT */
					continue;
				}

				bool is_bad = d->right->nearest_word == BAD_WORD;

				if (is_bad || right_connector_list_update(&pc, d->right, w, true) >= sent->length)
				{
					mark_connector_sequence_for_dequeue(d->right, true);
					mark_connector_sequence_for_dequeue(d->left, false);

					/* Discard the current disjunct. */
					*dd = d->next; /* NEXT - set current disjunct to the next one */
					N_deleted[(int)is_bad]++;
					continue;
				}

				dd = &d->next; /* NEXT */
			}

			clean_table(pt.l_table_size[w], pt.l_table[w]);
		}

		total_deleted += N_deleted[0] + N_deleted[1];
		lgdebug(D_PRUNE, "Debug: r->l pass changed %d and deleted %d (%d+%d)\n",
		        pc.N_changed, N_deleted[0]+N_deleted[1], N_deleted[0], N_deleted[1]);

		if (pc.N_changed == 0 && N_deleted[0] == 0 && N_deleted[1] == 0) break;
		pc.N_changed = N_deleted[0] = N_deleted[1] = 0;
	}
	power_table_delete(&pt);

	lgdebug(D_PRUNE, "Debug: power prune cost: %d\n", pc.power_cost);

	print_time(opts, "power pruned");
	if (verbosity_level(D_PRUNE))
	{
		prt_error("\n\\");
		prt_error("Debug: After power_pruning:\n\\");
		print_disjunct_counts(sent);
	}

#ifdef DEBUG
	for (WordIdx w = 0; w < sent->length; w++)
	{
		for (Disjunct *d = sent->word[w].d; NULL != d; d = d->next)
		{
			for (Connector *c = d->left; NULL != c; c = c->next)
				assert(c->nearest_word != BAD_WORD);
			for (Connector *c = d->right; NULL != c; c = c->next)
				assert(c->nearest_word != BAD_WORD);
		}
	}
#endif

	return total_deleted;
}