Example #1
0
Match_node * form_match_list
      (int w, Connector *lc, int lw, Connector *rc, int rw) {
/* Forms and returns a list of disjuncts that might match lc or rc or both.
   lw and rw are the words from which lc and rc came respectively.
   The list is formed by the link pointers of Match_nodes.
   The list contains no duplicates.  A quadratic algorithm is used to
   eliminate duplicates.  In practice the match_cost is less than the
   parse_cost (and the loop is tiny), so there's no reason to bother
   to fix this.
*/
    Match_node *ml, *mr, *mx, *my, * mz, *front, *free_later;

    if (lc!=NULL) {
	ml = l_table[w][fast_match_hash(lc) & (l_table_size[w]-1)];
    } else {
	ml = NULL;
    }
    if (rc!=NULL) {
	mr = r_table[w][fast_match_hash(rc) & (r_table_size[w]-1)];
    } else {
	mr = NULL;
    }

    front = NULL;
    for (mx = ml; mx!=NULL; mx=mx->next) {
	if (mx->d->left->word < lw) break;
	my = get_match_node();
	my->d = mx->d;
	my->next = front;
	front = my;
    }
    ml = front;   /* ml is now the list of things that could match the left */

    front = NULL;
    for (mx = mr; mx!=NULL; mx=mx->next) {
	if (mx->d->right->word > rw) break;
	my = get_match_node();
	my->d = mx->d;
	my->next = front;
	front = my;
    }
    mr = front;   /* mr is now the list of things that could match the right */

    /* now we want to eliminate duplicates from the lists */

    free_later = NULL;
    front = NULL;
    for(mx = mr; mx != NULL; mx=mz) {
	/* see if mx in first list, put it in if its not */
	mz = mx->next;
	match_cost++;
	for (my=ml; my!=NULL; my=my->next) {
	    match_cost++;
	    if (mx->d == my->d) break;
	}
	if (my != NULL) { /* mx was in the l list */
	    mx->next = free_later;
	    free_later = mx;
	}
	if (my==NULL) {  /* it was not there */
	    mx->next = front;
	    front = mx;
	}
    }
    mr = front;  /* mr is now the abbreviated right list */
    put_match_list(free_later);

    /* now catenate the two lists */
    if (mr == NULL) return ml;
    for (mx = mr; mx->next != NULL; mx = mx->next)
      ;
    mx->next = ml;
    return mr;
}
Example #2
0
/**
 * Forms and returns a list of disjuncts that might match lc or rc or both.
 * lw and rw are the words from which lc and rc came respectively.
 * The list is formed by the link pointers of Match_nodes.
 * The list contains no duplicates.  A quadratic algorithm is used to
 * eliminate duplicates.  In practice the match_cost is less than the
 * parse_cost (and the loop is tiny), so there's no reason to bother
 * to fix this.
 */
Match_node * 
form_match_list(Sentence sent, int w, 
                Connector *lc, int lw, Connector *rc, int rw)
{
	Match_node *ml, *mr, *mx, *my, * mz, *front, *free_later;

	match_context_t *ctxt = sent->match_ctxt;

	if (lc != NULL) {
		ml = ctxt->l_table[w][connector_hash(lc) & (ctxt->l_table_size[w]-1)];
	} else {
		ml = NULL;
	}
	if (rc != NULL) {
		mr = ctxt->r_table[w][connector_hash(rc) & (ctxt->r_table_size[w]-1)];
	} else {
		mr = NULL;
	}

	front = NULL;
	for (mx = ml; mx != NULL; mx = mx->next)
	{
		if (mx->d->left->word < lw) break;
		my = get_match_node(ctxt);
		my->d = mx->d;
		my->next = front;
		front = my;
	}
	ml = front;   /* ml is now the list of things that could match the left */

	front = NULL;
	for (mx = mr; mx != NULL; mx = mx->next)
	{
		if (mx->d->right->word > rw) break;
		my = get_match_node(ctxt);
		my->d = mx->d;
		my->next = front;
		front = my;
	}
	mr = front;   /* mr is now the list of things that could match the right */

	/* now we want to eliminate duplicates from the lists */

	free_later = NULL;
	front = NULL;
	for (mx = mr; mx != NULL; mx = mz)
	{
		/* see if mx in first list, put it in if its not */
		mz = mx->next;
		ctxt->match_cost++;
		for (my=ml; my!=NULL; my=my->next) {
			ctxt->match_cost++;
			if (mx->d == my->d) break;
		}
		if (my != NULL) { /* mx was in the l list */
			mx->next = free_later;
			free_later = mx;
		}
		if (my==NULL) {  /* it was not there */
			mx->next = front;
			front = mx;
		}
	}
	mr = front;  /* mr is now the abbreviated right list */
	put_match_list(sent, free_later);

	/* now catenate the two lists */
	if (mr == NULL) return ml;
	for (mx = mr; mx->next != NULL; mx = mx->next)
	  ;
	mx->next = ml;
	return mr;
}
Example #3
0
/**
 * Forms and returns a list of disjuncts coming from word w, that might
 * match lc or rc or both. The lw and rw are the words from which lc
 * and rc came respectively.
 *
 * The list is returned in a linked list of Match_nodes.
 * The list contains no duplicates.  A quadratic algorithm is used to
 * eliminate duplicates.  In practice the match_cost is less than the
 * parse_cost (and the loop is tiny), so there's no reason to bother
 * to fix this.  The number of times through the loop is counted with
 * 'match_cost', if verbosity>1, then it this will be printed at the end.
 *
 * Well, with one exception: for long sentences that have parse
 * overflows, this can sometimes get match lists that are hundreds of
 * elements long, dominating the total time spent in the algo; viz.
 * in excess of 50% of the time.
 */
Match_node *
form_match_list(fast_matcher_t *ctxt, int w,
                Connector *lc, int lw,
                Connector *rc, int rw)
{
	size_t rlen = 0, llen = 0;
	Match_node *ml, *mr, *mx, *my, *mz, *front, *free_later;

	if (lc != NULL) {
		ml = ctxt->l_table[w][connector_hash(lc) & (ctxt->l_table_size[w]-1)];
	} else {
		ml = NULL;
	}
	if (rc != NULL) {
		mr = ctxt->r_table[w][connector_hash(rc) & (ctxt->r_table_size[w]-1)];
	} else {
		mr = NULL;
	}

	front = NULL;
	for (mx = ml; mx != NULL; mx = mx->next)
	{
		if (mx->d->left->word < lw) break;
		my = get_match_node(ctxt);
		my->d = mx->d;
		my->next = front;
		front = my;
		llen++;
	}
	ml = front;   /* ml is now the list of things that could match the left */

	front = NULL;
	for (mx = mr; mx != NULL; mx = mx->next)
	{
		if (mx->d->right->word > rw) break;
		my = get_match_node(ctxt);
		my->d = mx->d;
		my->next = front;
		front = my;
		rlen++;
	}
	mr = front;   /* mr is now the list of things that could match the right */

	if (mr == NULL) return ml;
	if (ml == NULL) return mr;

	/* Now we want to eliminate duplicates from the lists. */
	/* If the left-lest is reasonably short, then just do a quadratic
	 * search for duplicates. But if the list is long, optimize the
	 * search.  Based on quickie measurements, the optimized version
	 * seems to dominate when 250 < llen and 8 < rlen. Roughly.
	 */
	if (llen < 250 || rlen < 9)
	{
		/* Perform a simple quadratic-time search. viz two nested loops.
		 * Runtime blows up horribly for lengths over a few hundred. */
		free_later = NULL;
		front = NULL;
		for (mx = mr; mx != NULL; mx = mz)
		{
			/* See if mx in first list, put it in if its not. */
			mz = mx->next;
			ctxt->match_cost++;
			for (my=ml; my!=NULL; my=my->next) {
				ctxt->match_cost++;
				if (mx->d == my->d) break;
			}
			if (my != NULL) { /* mx was in the l list */
				mx->next = free_later;
				free_later = mx;
			} else {  /* It was not there. */
				mx->next = front;
				front = mx;
			}
		}
		mr = front;  /* mr is now the abbreviated right list */
		put_match_list(ctxt, free_later);
	}
	else
	{
		/* Perform an O(N log N) search, by sorting first, and then
		 * doing a linear-line run through the sorted arrays.
		 */
		size_t i,j;
		Match_node* mx;
		Match_node** mra = alloca(rlen * sizeof(Match_node*));
		Match_node** mla = alloca(llen * sizeof(Match_node*));

		i = 0;
		for (mx = mr; mx != NULL; mx = mx->next) mra[i++] = mx;
		qsort((void *) mra, rlen, sizeof(Match_node*), addr_compare);

		i = 0;
		for (mx = ml; mx != NULL; mx = mx->next) mla[i++] = mx;
		qsort((void *) mla, llen, sizeof(Match_node*), addr_compare);

		/* Compare addresses side-by side in a linear loop.
		 * Be careful not to run past bounds arrays. */
		free_later = NULL;
		front = NULL;
		i = 0;
		j = 0;
		while (i < rlen)
		{
			while (i < rlen && mra[i]->d < mla[j]->d)
			{
				mra[i]->next = front;
				front = mra[i];
				i++;
			}
			if (i == rlen) break;

			if (mra[i]->d == mla[j]->d)
			{
				mra[i]->next = free_later;
				free_later = mra[i];
				i++; j++;
			}
			if (i == rlen) break;

			while (j < llen && mra[i]->d > mla[j]->d)
				j++;

			/* Drain the rest of the right-hand list. */
			if (j == llen)
			{
				while (i < rlen)
				{
					mra[i]->next = front;
					front = mra[i];
					i++;
				}
				break;
			}
		}
		mr = front;  /* mr is now the abbreviated right list */
		put_match_list(ctxt, free_later);
	}

	/* Now catenate the two lists. */
	if (mr == NULL) return ml;
	if (ml == NULL) return mr;
	for (mx = mr; mx->next != NULL; mx = mx->next)
	  ;
	mx->next = ml;
	return mr;
}