Esempio n. 1
0
static void insert_in_cms_table(multiset_table *cmt, Connector *c)
{
	Cms *cms, *prev = NULL;
	unsigned int h = cms_hash(connector_string(c));

	for (cms = cmt->cms_table[h]; cms != NULL; cms = cms->next)
	{
		if (c->desc == cms->c->desc) break;
		prev = cms;
	}

	if (cms == NULL)
	{
		cms = (Cms *) xalloc(sizeof(Cms));
		cms->c = c;
		cms->next = cmt->cms_table[h];
		cmt->cms_table[h] = cms;
	}
	else
	{
		/* MRU order */
		if (prev != NULL)
		{
			prev->next = cms->next;
			cms->next = cmt->cms_table[h];
			cmt->cms_table[h] = cms;
		}
	}
}
Esempio n. 2
0
static Cms * lookup_in_cms_table(multiset_table *cmt, const char * str)
{
	Cms * cms;
	for (cms = cmt->cms_table[cms_hash(str)]; cms != NULL; cms = cms->next)
	{
		if (string_set_cmp(str, cms->name)) return cms;
	}
	return NULL;
}
Esempio n. 3
0
/**
 * This returns TRUE if there is a connector name C in the table
 * such that post_process_match(pp_match_name, C) is TRUE
 */
static bool match_in_cms_table(multiset_table *cmt, const char * pp_match_name)
{
	Cms * cms;
	for (cms = cmt->cms_table[cms_hash(pp_match_name)]; cms != NULL; cms = cms->next)
	{
		if (post_process_match(pp_match_name, cms->name)) return true;
	}
	return false;
}
Esempio n. 4
0
/* FIXME? There is some code duplication here and in insert_in_cms_table()
 * but it seems cumbersome to fix it. */
static Cms *lookup_in_cms_table(multiset_table *cmt, Connector *c)
{
	unsigned int h = cms_hash(connector_string(c));

	for (Cms *cms = cmt->cms_table[h]; cms != NULL; cms = cms->next)
	{
		if (c->desc == cms->c->desc) return cms;
	}

	return NULL;
}
Esempio n. 5
0
static void insert_in_cms_table(multiset_table *cmt, const char * str)
{
	Cms * cms;
	unsigned int h;
	cms = lookup_in_cms_table(cmt, str);
	if (cms != NULL) {
		cms->count++;
	} else {
		cms = (Cms *) xalloc(sizeof(Cms));
		cms->name = str;  /* don't copy the string...just keep a pointer to it.
							 we won't free these later */
		cms->count = 1;
		h = cms_hash(str);
		cms->next = cmt->cms_table[h];
		cmt->cms_table[h] = cms;
	}
}
Esempio n. 6
0
/**
 * Returns TRUE iff there is a connector name c in the table
 * that can create a link x such that post_process_match(pp_link, x) is TRUE.
 */
static bool match_in_cms_table(multiset_table *cmt, const char *pp_link,
                               const char *c)
{
	unsigned int h = cms_hash(pp_link);

	for (Cms *cms = cmt->cms_table[h]; cms != NULL; cms = cms->next)
	{
			if (can_form_link(pp_link, connector_string(cms->c), c))
			{
				ppdebug("MATCHED %s\n", connector_string(cms->c));
				return true;
			}
			ppdebug("NOT-MATCHED %s \n", connector_string(cms->c));
	}

	return false;
}
Esempio n. 7
0
_Bool
cms_incr(const CMS * const cms, const char * const item, const size_t item_len)
{
    uint64_t      hashes[2];
    size_t        k_i = (size_t) 0U;
    size_t        offset;
    _Bool         overflow = 0;

    do {
        offset = (size_t) (cms_hash(cms, hashes,
                                    item, item_len, k_i) % cms->vector_entries);
        if (cms->vector[offset] >= CMSCOUNT_MAX) {
            overflow = 1;
        } else {
            cms->vector[offset]++;
        }
    } while (++k_i < cms->k_num);

    return overflow;
}
Esempio n. 8
0
CMSCount
cms_count(const CMS * const cms, const char * const item,
          const size_t item_len)
{
    uint64_t      hashes[2];
    size_t        k_i = (size_t) 0U;
    size_t        offset;
    CMSCount      min = 0;
    CMSCount      val;
    _Bool         min_set = 0;

    do {
        offset = (size_t) (cms_hash(cms, hashes,
                                    item, item_len, k_i) % cms->vector_entries);
        val = cms->vector[offset];
        if (min_set == 0 || val < min) {
            min_set = 1;
            min = val;
        }
    } while (++k_i < cms->k_num);

    return min;
}
Esempio n. 9
0
static int pp_prune(Sentence sent, Parse_Options opts)
{
	pp_knowledge *knowledge;
	multiset_table *cmt;

	if (sent->postprocessor == NULL) return 0;
	if (!opts->perform_pp_prune) return 0;

	knowledge = sent->postprocessor->knowledge;
	cmt = cms_table_new();

	jet_sharing_t *js = &sent->jet_sharing;
	if (js->table[0] != NULL)
	{
		for (int dir = 0; dir < 2; dir++)
		{
			for (unsigned int id = 1; id < js->entries[dir] + 1; id++)
			{
				for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next)
				{
					if (0 == c->refcount) continue;
					insert_in_cms_table(cmt, c);
				}
			}
		}
	}
	else
	{
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				for (int dir = 0; dir < 2; dir++)
				{
					Connector *first_c = (dir) ? (d->left) : (d->right);
					for (Connector *c = first_c; c != NULL; c = c->next)
					{
						insert_in_cms_table(cmt, c);
					}
				}
			}
		}
	}

	int D_deleted = 0;       /* Number of deleted disjuncts */
	int Cname_deleted = 0;   /* Number of deleted connector names */

	/* Since the cms table is unchanged, after applying a rule once we
	 * know if it will be TRUE or FALSE if we need to apply it again.
	 * Values: -1: Undecided yet; 0: Rule unsatisfiable; 1 Rule satisfiable. */
	uint8_t *rule_ok = alloca(knowledge->n_contains_one_rules * sizeof(bool));
	memset(rule_ok, -1, knowledge->n_contains_one_rules * sizeof(bool));

	for (size_t i = 0; i < knowledge->n_contains_one_rules; i++)
	{
		if (rule_ok[i] == 1) continue;

		pp_rule* rule = &knowledge->contains_one_rules[i]; /* The ith rule */
		const char *selector = rule->selector;  /* Selector string for this rule */
		pp_linkset *link_set = rule->link_set;  /* The set of criterion links */
		unsigned int hash = cms_hash(selector);

		if (rule->selector_has_wildcard)
		{
			rule_ok[i] = 1;
			continue;  /* If it has a * forget it */
		}

		for (Cms *cms = cmt->cms_table[hash]; cms != NULL; cms = cms->next)
		{
			Connector *c = cms->c;
			if (!post_process_match(selector, connector_string(c))) continue;

			ppdebug("Rule %zu: Selector %s, Connector %s\n",
			        i, selector, connector_string(c));
			/* We know c matches the trigger link of the rule. */
			/* Now check the criterion links */
			if ((rule_ok[i] == 0) || !rule_satisfiable(cmt, link_set))
			{
				rule_ok[i] = 0;
				ppdebug("DELETE %s refcount %d\n", connector_string(c), c->refcount);
				c->nearest_word = BAD_WORD;
				Cname_deleted++;
				rule->use_count++;
			}
			else
			{
				rule_ok[i] = 1;
				break;
			}
		}
	}

	/* Iterate over all connectors and mark the bad trigger connectors.
	 * If the marked connector is not the shallow one, note that the
	 * shallow one on the same disjunct cannot be marked too (this could
	 * facilitate faster detection by power_prune()) because this would be
	 * wrongly reflected through the cms table. */

	if (js->table[0] != NULL)
	{
		for (int dir = 0; dir < 2; dir++)
		{
			for (unsigned int id = 1; id < js->entries[dir] + 1; id++)
			{
				for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next)
				{
					if (0 == c->refcount) continue;
					if (mark_bad_connectors(cmt, c))
					{
						D_deleted++;
						break;
					}
				}
			}
		}
	}
	else
	{
		for (WordIdx w = 0; w < sent->length; w++)
		{
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				for (int dir = 0; dir < 2; dir++)
				{
					Connector *first_c = (dir) ? (d->left) : (d->right);
					for (Connector *c = first_c; c != NULL; c = c->next)
					{
						if (mark_bad_connectors(cmt, c))
						{
							D_deleted++;
							break;
						}
					}
				}

			}
		}
	}

	lgdebug(+D_PRUNE, "Deleted %d (%d connector names)\n",
	        D_deleted, Cname_deleted);

	cms_table_delete(cmt);

	print_time(opts, "pp pruning");

	return D_deleted;
}