예제 #1
0
void init_fast_matcher(Sentence sent) {
    int w, len, size, i;
    Match_node ** t;
    Disjunct * d;
    match_cost = 0;
    for (w=0; w<sent->length; w++) {
	len = left_disjunct_list_length(sent->word[w].d);
	size = next_power_of_two_up(len);
	l_table_size[w] = size;
	t = l_table[w] = (Match_node **) xalloc(size * sizeof(Match_node *));
	for (i=0; i<size; i++) t[i] = NULL;

	for (d=sent->word[w].d; d!=NULL; d=d->next) {
	    if (d->left != NULL) {
		put_into_match_table(size, t, d, d->left, -1);
	    }
	}

	len = right_disjunct_list_length(sent->word[w].d);
	size = next_power_of_two_up(len);
	r_table_size[w] = size;
	t = r_table[w] = (Match_node **) xalloc(size * sizeof(Match_node *));
	for (i=0; i<size; i++) t[i] = NULL;

	for (d=sent->word[w].d; d!=NULL; d=d->next) {
	    if (d->right != NULL) {
		put_into_match_table(size, t, d, d->right, 1);
	    }
	}
    }
}
예제 #2
0
fast_matcher_t* alloc_fast_matcher(const Sentence sent)
{
	unsigned int size;
	size_t w;
	int len;
	Match_node ** t;
	Disjunct * d;
	fast_matcher_t *ctxt;

	ctxt = (fast_matcher_t *) xalloc(sizeof(fast_matcher_t));
	ctxt->size = sent->length;
	ctxt->l_table_size = xalloc(2 * sent->length * sizeof(unsigned int));
	ctxt->r_table_size = ctxt->l_table_size + sent->length;
	ctxt->l_table = xalloc(2 * sent->length * sizeof(Match_node **));
	ctxt->r_table = ctxt->l_table + sent->length;
	memset(ctxt->l_table, 0, 2 * sent->length * sizeof(Match_node **));
	ctxt->match_cost = 0;
	ctxt->mn_free_list = NULL;

	for (w=0; w<sent->length; w++)
	{
		len = left_disjunct_list_length(sent->word[w].d);
		size = next_power_of_two_up(len);
		ctxt->l_table_size[w] = size;
		t = ctxt->l_table[w] = (Match_node **) xalloc(size * sizeof(Match_node *));
		memset(t, 0, size * sizeof(Match_node *));

		for (d = sent->word[w].d; d != NULL; d = d->next)
		{
			if (d->left != NULL)
			{
				put_into_match_table(size, t, d, d->left, -1);
			}
		}

		len = right_disjunct_list_length(sent->word[w].d);
		size = next_power_of_two_up(len);
		ctxt->r_table_size[w] = size;
		t = ctxt->r_table[w] = (Match_node **) xalloc(size * sizeof(Match_node *));
		memset(t, 0, size * sizeof(Match_node *));

		for (d = sent->word[w].d; d != NULL; d = d->next)
		{
			if (d->right != NULL)
			{
				put_into_match_table(size, t, d, d->right, 1);
			}
		}
	}

	return ctxt;
}
예제 #3
0
Connector_set * connector_set_create(Exp *e) {
    int i;
    Connector_set *conset;

    conset = (Connector_set *) xalloc(sizeof(Connector_set));
    conset->table_size = next_power_of_two_up(size_of_expression(e));
    conset->hash_table =
      (Connector **) xalloc(conset->table_size * sizeof(Connector *));
    for (i=0; i<conset->table_size; i++) conset->hash_table[i] = NULL;
    build_connector_set_from_expression(conset, e);
    return conset;
}
예제 #4
0
/**
 * Align given size to the nearest upper power of 2
 * for size<MAX_ALIGNMENT, else to MIN_ALIGNMENT.
 */
static size_t align_size(size_t element_size)
{
	if (element_size < MAX_ALIGNMENT)
	{
		size_t s = next_power_of_two_up(element_size);
		if (s != element_size)
			element_size = ALIGN(element_size, s);
	}
	else
	{
		element_size = ALIGN(element_size, MIN_ALIGNMENT);
	}

	return element_size;
}
예제 #5
0
/**
 * Takes the list of disjuncts pointed to by d, eliminates all
 * duplicates, and returns a pointer to a new list.
 * It frees the disjuncts that are eliminated.
 */
Disjunct * eliminate_duplicate_disjuncts(Disjunct * d)
{
	int i, h, count;
	Disjunct *dn, *dx, *dxn, *front;
	count = 0;
	disjunct_dup_table *dt;

	dt = disjunct_dup_table_new(next_power_of_two_up(2 * count_disjuncts(d)));

	for (;d!=NULL; d = dn)
	{
		dn = d->next;
		h = hash_disjunct(d);

		front = NULL;
		for (dx = dt->dup_table[h]; dx != NULL; dx = dxn)
		{
			dxn = dx->next;
			if (disjunct_matches_alam(dx,d))
			{
				/* we know that d should be killed */
				d->next = NULL;
				free_disjuncts(d);
				count++;
				front = catenate_disjuncts(front, dx);
				break;
			} else if (disjunct_matches_alam(d,dx)) {
				/* we know that dx should be killed off */
				dx->next = NULL;
				free_disjuncts(dx);
				count++;
			} else {
				/* neither should be killed off */
				dx->next = front;
				front = dx;
			}
		}
		if (dx == NULL) {
			/* we put d in the table */
			d->next = front;
			front = d;
		}
		dt->dup_table[h] = front;
	}

	/* d is now NULL */
	for (i = 0; i < dt->dup_table_size; i++)
	{
		for (dx = dt->dup_table[i]; dx != NULL; dx = dxn)
		{
			dxn = dx->next;
			dx->next = d;
			d = dx;
		}
	}

	if ((verbosity > 2) && (count != 0)) printf("killed %d duplicates\n", count);

	disjunct_dup_table_delete(dt);
	return d;
}
예제 #6
0
파일: prune.c 프로젝트: ampli/link-grammar
/**
 * Allocates and builds the initial power hash tables.
 * Each word has 2 tables - for its left and right connectors.
 * In these tables, the connectors are hashed according to their
 * uppercase part.
 * In each hash slot, the shallow connectors appear first, so when
 * matching deep connectors to the connectors in a slot, the
 * match loop can stop when there are no more shallow connectors in that
 * slot (since if both are deep, they cannot be matched).
 *
 * The suffix_id of each connector serves as its reference count.
 * Hence, it should always be > 0.
 *
 * There are two code paths for initializing the power tables:
 * 1. When disjunct-jets sharing is not done. The words then are
 * directly scanned for their disjuncts and connectors. Each ones
 * is inserted with a reference count (as suffix_id) set to 1.
 * 2. Using the disjunct-jet tables (left and right). Each slot
 * contains only a pointer to a disjunct-jet. The word number is
 * extracted from the deepest connector (that has been assigned to it by
 * setup_connectors()).
 *
 * FIXME: Find a way to not use a reference count (to increase
 * efficiency).
 */
static void power_table_init(Sentence sent, power_table *pt)
{
	unsigned int i;
#define TOPSZ 32768
	size_t lr_table_max_usage = MIN(sent->dict->contable.num_con, TOPSZ);

	Pool_desc *mp = pt->memory_pool = pool_new(__func__, "C_list",
	                   /*num_elements*/2048, sizeof(C_list),
	                   /*zero_out*/false, /*align*/false, /*exact*/false);

	for (WordIdx w = 0; w < sent->length; w++)
	{
		size_t l_size, r_size;
		C_list **l_t, **r_t;
		size_t len;

		/* The below uses variable-sized hash tables. This seems to
		 * provide performance that is equal or better than the best
		 * fixed-size performance.
		 * The best fixed-size performance seems to come at about
		 * a 1K table size, for both English and Russian. (Both have
		 * about 100 fixed link-types, and many thousands of auto-genned
		 * link types (IDxxx idioms for both, LLxxx suffix links for
		 * Russian).  Pluses and minuses:
		 * + small fixed tables are faster to initialize.
		 * - small fixed tables have more collisions
		 * - variable-size tables require counting connectors.
		 *   (and the more complex code to go with)
		 * CPU cache-size effects ...
		 */
		if (sent->jet_sharing.num_cnctrs_per_word[0])
			len = sent->jet_sharing.num_cnctrs_per_word[0][w];
		else
			len = left_connector_count(sent->word[w].d);
		l_size = next_power_of_two_up(MIN(len, lr_table_max_usage));
		pt->l_table_size[w] = l_size;
		l_t = pt->l_table[w] = (C_list **) xalloc(l_size * sizeof(C_list *));
		for (i=0; i<l_size; i++) l_t[i] = NULL;

		if (sent->jet_sharing.num_cnctrs_per_word[1])
			len = sent->jet_sharing.num_cnctrs_per_word[1][w];
		else
			len = right_connector_count(sent->word[w].d);
		r_size = next_power_of_two_up(MIN(len, lr_table_max_usage));
		pt->r_table_size[w] = r_size;
		r_t = pt->r_table[w] = (C_list **) xalloc(r_size * sizeof(C_list *));
		for (i=0; i<r_size; i++) r_t[i] = NULL;

		if (!sent->jet_sharing.num_cnctrs_per_word[0])
		{
			/* Insert the deep connectors. */
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				Connector *c;

				c = d->right;
				if (c != NULL)
				{
					c->suffix_id = 1;
					for (c = c->next; c != NULL; c = c->next)
					{
						c->suffix_id = 1;
						put_into_power_table(mp, r_size, r_t, c, false);
					}
				}

				c = d->left;
				if (c != NULL)
				{
					c->suffix_id = 1;
					for (c = c->next; c != NULL; c = c->next)
					{
						c->suffix_id = 1;
						put_into_power_table(mp, l_size, l_t, c, false);
					}
				}
			}

			/* Insert the shallow connectors. */
			for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next)
			{
				Connector *c;

				c = d->right;
				if (c != NULL)
				{
					put_into_power_table(mp, r_size, r_t, c, true);
				}
				c = d->left;
				if (c != NULL)
				{
					put_into_power_table(mp, l_size, l_t, c, true);
				}
			}
		}
	}

	if (sent->jet_sharing.num_cnctrs_per_word[0])
	{
		/* Bulk insertion with reference count. Note: IDs start from 1. */

		/* Insert the deep connectors. */
		for (int dir = 0; dir < 2; dir++)
		{
			C_list ***tp;
			unsigned int *sizep;

			if (dir== 0)
			{
				tp = pt->l_table;
				sizep = pt->l_table_size;
			}
			else
			{
				tp = pt->r_table;
				sizep = pt->r_table_size;
			}

			for (unsigned int id = 1; id < sent->jet_sharing.entries[dir] + 1; id++)
			{
				Connector *htc = sent->jet_sharing.table[dir][id];
				Connector *deepest;

				for (deepest = htc; NULL != deepest->next; deepest = deepest->next)
					;
				int w = deepest->nearest_word + ((dir== 0) ? 1 : -1);

				unsigned int size = sizep[w];
				C_list **t = tp[w];
				int suffix_id = htc->suffix_id;

				for (Connector *c = htc->next; NULL != c; c = c->next)
				{
					c->suffix_id = suffix_id;
					put_into_power_table(mp, size, t, c, false);
				}
			}

			/* Insert the shallow connectors. */
			for (unsigned int id = 1; id < sent->jet_sharing.entries[dir] + 1; id++)
			{
				Connector *htc = sent->jet_sharing.table[dir][id];
				Connector *deepest;

				for (deepest = htc; NULL != deepest->next; deepest = deepest->next)
					;
				int w = deepest->nearest_word + ((dir == 0) ? 1 : -1);

				unsigned int size = sizep[w];
				C_list **t = tp[w];

				put_into_power_table(mp, size, t, htc, true);
			}
		}
	}
}