Example #1
0
static int pp_prune(Sentence sent, Parse_Options opts)
{
	pp_knowledge * knowledge;
	size_t i, w;
	int total_deleted, N_deleted;
	bool change, deleteme;
	multiset_table *cmt;

	if (sent->postprocessor == NULL) return 0;
	if (!opts->perform_pp_prune) return 0;

	knowledge = sent->postprocessor->knowledge;

	cmt = cms_table_new();

	for (w = 0; w < sent->length; w++)
	{
		Disjunct *d;
		for (d = sent->word[w].d; d != NULL; d = d->next)
		{
			char dir;
			d->marked = true;
			for (dir=0; dir < 2; dir++)
			{
				Connector *c;
				for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
				{
					insert_in_cms_table(cmt, connector_string(c));
				}
			}
		}
	}

	total_deleted = 0;
	change = true;
	while (change)
	{
		char dir;

		change = false;
		N_deleted = 0;
		for (w = 0; w < sent->length; w++)
		{
			Disjunct *d;
			for (d = sent->word[w].d; d != NULL; d = d->next)
			{
				if (!d->marked) continue;
				deleteme = false;
				for (i = 0; i < knowledge->n_contains_one_rules; i++)
				{
					pp_rule* rule = &knowledge->contains_one_rules[i]; /* the ith rule */
					const char * selector = rule->selector;  /* selector string for this rule */
					pp_linkset * link_set = rule->link_set;  /* the set of criterion links */

					if (rule->selector_has_wildcard) continue;  /* If it has a * forget it */

					for (dir = 0; dir < 2; dir++)
					{
						Connector *c;
						for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
						{

							if (!post_process_match(selector, connector_string(c))) continue;

							/*
							printf("pp_prune: trigger ok.  selector = %s  c->string = %s\n", selector, c->string);
							*/

							/* We know c matches the trigger link of the rule. */
							/* Now check the criterion links */

							if (!rule_satisfiable(cmt, link_set))
							{
								deleteme = true;
								rule->use_count++;
							}
							if (deleteme) break;
						}
						if (deleteme) break;
					}
					if (deleteme) break;
				}

				if (deleteme)         /* now we delete this disjunct */
				{
					N_deleted++;
					total_deleted++;
					d->marked = false; /* mark for deletion later */
					for (dir=0; dir < 2; dir++)
					{
						Connector *c;
						for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next)
						{
							change |= delete_from_cms_table(cmt, connector_string(c));
						}
					}
				}
			}
		}

		lgdebug(D_PRUNE, "Debug: pp_prune pass deleted %d\n", N_deleted);
	}
	cms_table_delete(cmt);

	if (total_deleted > 0)
	{
		delete_unmarked_disjuncts(sent);
		if (verbosity_level(D_PRUNE))
		{
			prt_error("\n\\");
			prt_error("Debug: After pp_prune:\n\\");
			print_disjunct_counts(sent);
		}
	}

	print_time(opts, "pp pruning");

	return total_deleted;
}
Example #2
0
static int pp_prune(Sentence sent, Parse_Options opts)
{
	pp_knowledge *knowledge;
	multiset_table *cmt;

	if (sent->postprocessor == NULL) return 0;
	if (!opts->perform_pp_prune) return 0;

	knowledge = sent->postprocessor->knowledge;
	cmt = cms_table_new();

	jet_sharing_t *js = &sent->jet_sharing;
	if (js->table[0] != NULL)
	{
		for (int dir = 0; dir < 2; dir++)
		{
			for (unsigned int id = 1; id < js->entries[dir] + 1; id++)
			{
				for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next)
				{
					if (0 == c->refcount) continue;
					insert_in_cms_table(cmt, c);
				}
			}
		}
	}
	else
	{
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				for (int dir = 0; dir < 2; dir++)
				{
					Connector *first_c = (dir) ? (d->left) : (d->right);
					for (Connector *c = first_c; c != NULL; c = c->next)
					{
						insert_in_cms_table(cmt, c);
					}
				}
			}
		}
	}

	int D_deleted = 0;       /* Number of deleted disjuncts */
	int Cname_deleted = 0;   /* Number of deleted connector names */

	/* Since the cms table is unchanged, after applying a rule once we
	 * know if it will be TRUE or FALSE if we need to apply it again.
	 * Values: -1: Undecided yet; 0: Rule unsatisfiable; 1 Rule satisfiable. */
	uint8_t *rule_ok = alloca(knowledge->n_contains_one_rules * sizeof(bool));
	memset(rule_ok, -1, knowledge->n_contains_one_rules * sizeof(bool));

	for (size_t i = 0; i < knowledge->n_contains_one_rules; i++)
	{
		if (rule_ok[i] == 1) continue;

		pp_rule* rule = &knowledge->contains_one_rules[i]; /* The ith rule */
		const char *selector = rule->selector;  /* Selector string for this rule */
		pp_linkset *link_set = rule->link_set;  /* The set of criterion links */
		unsigned int hash = cms_hash(selector);

		if (rule->selector_has_wildcard)
		{
			rule_ok[i] = 1;
			continue;  /* If it has a * forget it */
		}

		for (Cms *cms = cmt->cms_table[hash]; cms != NULL; cms = cms->next)
		{
			Connector *c = cms->c;
			if (!post_process_match(selector, connector_string(c))) continue;

			ppdebug("Rule %zu: Selector %s, Connector %s\n",
			        i, selector, connector_string(c));
			/* We know c matches the trigger link of the rule. */
			/* Now check the criterion links */
			if ((rule_ok[i] == 0) || !rule_satisfiable(cmt, link_set))
			{
				rule_ok[i] = 0;
				ppdebug("DELETE %s refcount %d\n", connector_string(c), c->refcount);
				c->nearest_word = BAD_WORD;
				Cname_deleted++;
				rule->use_count++;
			}
			else
			{
				rule_ok[i] = 1;
				break;
			}
		}
	}

	/* Iterate over all connectors and mark the bad trigger connectors.
	 * If the marked connector is not the shallow one, note that the
	 * shallow one on the same disjunct cannot be marked too (this could
	 * facilitate faster detection by power_prune()) because this would be
	 * wrongly reflected through the cms table. */

	if (js->table[0] != NULL)
	{
		for (int dir = 0; dir < 2; dir++)
		{
			for (unsigned int id = 1; id < js->entries[dir] + 1; id++)
			{
				for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next)
				{
					if (0 == c->refcount) continue;
					if (mark_bad_connectors(cmt, c))
					{
						D_deleted++;
						break;
					}
				}
			}
		}
	}
	else
	{
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				for (int dir = 0; dir < 2; dir++)
				{
					Connector *first_c = (dir) ? (d->left) : (d->right);
					for (Connector *c = first_c; c != NULL; c = c->next)
					{
						if (mark_bad_connectors(cmt, c))
						{
							D_deleted++;
							break;
						}
					}
				}

			}
		}
	}

	lgdebug(+D_PRUNE, "Deleted %d (%d connector names)\n",
	        D_deleted, Cname_deleted);

	cms_table_delete(cmt);

	print_time(opts, "pp pruning");

	return D_deleted;
}