Exemple #1
0
/**
 * returns NULL if there are no ways to parse, or returns a pointer
 * to a set structure representing all the ways to parse.
 *
 * This code is similar to do_count() in count.c -- for a good reason:
 * the do_count() function did a full parse, but didn't actually
 * allocate an memory structures to hold the parse.  This also does
 * a full parse, but it also allocates and fills out the various
 * parse structures.
 */
static
Parse_set * mk_parse_set(Sentence sent, fast_matcher_t *mchxt,
                         count_context_t * ctxt,
                         Disjunct *ld, Disjunct *rd, int lw, int rw,
                         Connector *le, Connector *re, unsigned int null_count,
                         bool islands_ok, Parse_info pi)
{
    Disjunct * d, * dis;
    int start_word, end_word, w;
    bool Lmatch, Rmatch;
    unsigned int lnull_count, rnull_count;
    int i, j;
    Parse_set *ls[4], *rs[4], *lset, *rset;
    Parse_choice * a_choice;

    Match_node * m, *m1;
    X_table_connector *xt;
    s64 count;

    assert(null_count < 0x7fff, "mk_parse_set() called with null_count < 0.");

    count = table_lookup(ctxt, lw, rw, le, re, null_count);

    /*
      assert(count >= 0, "mk_parse_set() called on params that were not in the table.");
      Actually, we can't assert this, because of the pseudocount technique that's
      used in count().  It's not the case that every call to mk_parse_set() has already
      been put into the table.
     */

    if ((count == 0) || (count == -1)) return NULL;

    xt = x_table_pointer(lw, rw, le, re, null_count, pi);

    if (xt != NULL) return xt->set;  /* we've already computed it */

    /* Start it out with the empty set of options. */
    /* This entry must be updated before we return. */
    xt = x_table_store(lw, rw, le, re, null_count, pi);

    xt->set->count = count;  /* the count we already computed */
    /* this count is non-zero */

    if (rw == 1 + lw) return xt->set;

    if ((le == NULL) && (re == NULL))
    {
        if (!islands_ok && (lw != -1)) return xt->set;

        if (null_count == 0) return xt->set;

        w = lw + 1;
        for (dis = sent->word[w].d; dis != NULL; dis = dis->next)
        {
            if (dis->left == NULL)
            {
                rs[0] = mk_parse_set(sent, mchxt, ctxt, dis, NULL, w, rw, dis->right,
                                     NULL, null_count-1, islands_ok, pi);
                if (rs[0] == NULL) continue;
                a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
                                       rs[0], w, rw, NULL, NULL,
                                       NULL, NULL, NULL);
                put_choice_in_set(xt->set, a_choice);
            }
        }
        rs[0] = mk_parse_set(sent, mchxt, ctxt, NULL, NULL, w, rw, NULL, NULL,
                             null_count-1, islands_ok, pi);
        if (rs[0] != NULL)
        {
            a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
                                   rs[0], w, rw, NULL, NULL,
                                   NULL, NULL, NULL);
            put_choice_in_set(xt->set, a_choice);
        }
        return xt->set;
    }

    if (le == NULL)
    {
        start_word = lw + 1;
    }
    else
    {
        start_word = le->word;
    }

    if (re == NULL)
    {
        end_word = rw;
    }
    else
    {
        end_word = re->word + 1;
    }

    /* This condition can never be true here. It is included so GCC will be able
     * to optimize the loop over "null_count".  Without this check, GCC thinks this
     * loop may be an infinite loop and it may omit some optimizations. */
    if (UINT_MAX == null_count) return NULL;

    for (w = start_word; w < end_word; w++)
    {
        m1 = m = form_match_list(mchxt, w, le, lw, re, rw);
        for (; m!=NULL; m=m->next)
        {
            d = m->d;
            for (lnull_count = 0; lnull_count <= null_count; lnull_count++)
            {
                rnull_count = null_count-lnull_count;
                /* now lnull_count and rnull_count are the null_counts we're assigning to
                 * those parts respectively */

                /* Now, we determine if (based on table only) we can see that
                   the current range is not parsable. */

                Lmatch = (le != NULL) && (d->left != NULL) && do_match(le, d->left, lw, w);
                Rmatch = (d->right != NULL) && (re != NULL) && do_match(d->right, re, w, rw);
                for (i=0; i<4; i++) {
                    ls[i] = rs[i] = NULL;
                }
                if (Lmatch)
                {
                    ls[0] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le->next, d->left->next, lnull_count, islands_ok, pi);
                    if (le->multi) ls[1] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le, d->left->next, lnull_count, islands_ok, pi);
                    if (d->left->multi) ls[2] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le->next, d->left, lnull_count, islands_ok, pi);
                    if (le->multi && d->left->multi) ls[3] = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le, d->left, lnull_count, islands_ok, pi);
                }
                if (Rmatch)
                {
                    rs[0] = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right->next, re->next, rnull_count, islands_ok, pi);
                    if (d->right->multi) rs[1] = mk_parse_set(sent, mchxt, ctxt, d, rd, w,rw,d->right,re->next, rnull_count, islands_ok, pi);
                    if (re->multi) rs[2] = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right->next, re, rnull_count, islands_ok, pi);
                    if (d->right->multi && re->multi) rs[3] = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right, re, rnull_count, islands_ok, pi);
                }

                for (i=0; i<4; i++)
                {
                    /* this ordering is probably not consistent with that
                     *  needed to use list_links */
                    if (ls[i] == NULL) continue;
                    for (j=0; j<4; j++)
                    {
                        if (rs[j] == NULL) continue;
                        a_choice = make_choice(ls[i], lw, w, le, d->left,
                                               rs[j], w, rw, d->right, re,
                                               ld, d, rd);
                        put_choice_in_set(xt->set, a_choice);
                    }
                }

                if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL)
                {
                    /* evaluate using the left match, but not the right */
                    rset = mk_parse_set(sent, mchxt, ctxt, d, rd, w, rw, d->right, re, rnull_count, islands_ok, pi);
                    if (rset != NULL)
                    {
                        for (i=0; i<4; i++)
                        {
                            if (ls[i] == NULL) continue;
                            /* this ordering is probably not consistent with
                             * that needed to use list_links */
                            a_choice = make_choice(ls[i], lw, w, le, d->left,
                                                   rset, w, rw, NULL /* d->right */,
                                                   re,  /* the NULL indicates no link*/
                                                   ld, d, rd);
                            put_choice_in_set(xt->set, a_choice);
                        }
                    }
                }
                if ((le == NULL) && (rs[0] != NULL ||
                                     rs[1] != NULL || rs[2] != NULL || rs[3] != NULL))
                {
                    /* evaluate using the right match, but not the left */
                    lset = mk_parse_set(sent, mchxt, ctxt, ld, d, lw, w, le, d->left, lnull_count, islands_ok, pi);

                    if (lset != NULL)
                    {
                        for (i=0; i<4; i++)
                        {
                            if (rs[i] == NULL) continue;
                            /* this ordering is probably not consistent with
                             * that needed to use list_links */
                            a_choice = make_choice(lset, lw, w, NULL /* le */,
                                                   d->left,  /* NULL indicates no link */
                                                   rs[i], w, rw, d->right, re,
                                                   ld, d, rd);
                            put_choice_in_set(xt->set, a_choice);
                        }
                    }
                }
            }
        }
        put_match_list(mchxt, m1);
    }
    xt->set->current = xt->set->first;
    return xt->set;
}
Exemple #2
0
Parse_set * parse_set(Disjunct *ld, Disjunct *rd, int lw, int rw, 
		      Connector *le, Connector *re, int cost, Parse_info * pi) {
    /* returns NULL if there are no ways to parse, or returns a pointer
       to a set structure representing all the ways to parse */

    Disjunct * d, * dis;
    int start_word, end_word, w;
    int lcost, rcost, Lmatch, Rmatch;
    int i, j;
    Parse_set *ls[4], *rs[4], *lset, *rset;
    Parse_choice * a_choice;

    Match_node * m, *m1;
    X_table_connector *xt;
    int count;

    assert(cost >= 0, "parse_set() called with cost < 0.");

    count = table_lookup(lw, rw, le, re, cost);

    /*
      assert(count >= 0, "parse_set() called on params that were not in the table.");
      Actually, we can't assert this, because of the pseudocount technique that's
      used in count().  It's not the case that every call to parse_set() has already
      been put into the table.
     */

    if ((count == 0) || (count == -1)) return NULL;
    
    xt = x_table_pointer(lw, rw, le, re, cost, pi);

    if (xt == NULL) {
	xt = x_table_store(lw, rw, le, re, cost, empty_set(), pi);
	/* start it out with the empty set of options */
	/* this entry must be updated before we return */
    } else {
	return xt->set;  /* we've already computed it */
    }

    xt->set->count = count;  /* the count we already computed */
    /* this count is non-zero */
    
    if (rw == 1+lw) return xt->set;
    if ((le == NULL) && (re == NULL)) {
	if (!islands_ok && (lw != -1)) {
	    return xt->set;
	}
	if (cost == 0) {
	    return xt->set;
	} else {
	    w = lw+1;
	    for (dis = local_sent[w].d; dis != NULL; dis = dis->next) {
		if (dis->left == NULL) {
		    rs[0] = parse_set(dis, NULL, w, rw, dis->right, NULL, cost-1, pi);
		    if (rs[0] == NULL) continue;
		    a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
					   rs[0], w, rw, NULL, NULL,
					   NULL, NULL, NULL);
		    put_choice_in_set(xt->set, a_choice);
		}
	    }
	    rs[0] = parse_set(NULL, NULL, w, rw, NULL, NULL, cost-1, pi); 
	    if (rs[0] != NULL) {
		a_choice = make_choice(dummy_set(), lw, w, NULL, NULL,
				       rs[0], w, rw, NULL, NULL,
				       NULL, NULL, NULL);
		put_choice_in_set(xt->set, a_choice);
	    }
	    return xt->set;
	}
    }
    
    if (le == NULL) {
	start_word = lw+1;
    } else {
	start_word = le->word;

    }

    if (re == NULL) {
	end_word = rw-1;
    } else {
	end_word = re->word;
    }
    
    for (w=start_word; w <= end_word; w++) {
	m1 = m = form_match_list(w, le, lw, re, rw); 
	for (; m!=NULL; m=m->next) {
	    d = m->d;
	    for (lcost = 0; lcost <= cost; lcost++) {
		rcost = cost-lcost;
		/* now lcost and rcost are the costs we're assigning to those parts respectively */

		/* Now, we determine if (based on table only) we can see that
		   the current range is not parsable. */

		Lmatch = (le != NULL) && (d->left != NULL) && match(le, d->left, lw, w);
		Rmatch = (d->right != NULL) && (re != NULL) && match(d->right, re, w, rw);
		for (i=0; i<4; i++) {ls[i] = rs[i] = NULL;}
		if (Lmatch) {
		    ls[0] = parse_set(ld, d, lw, w, le->next, d->left->next, lcost, pi);
		    if (le->multi) ls[1] = parse_set(ld, d, lw, w, le, d->left->next, lcost, pi);
		    if (d->left->multi) ls[2] = parse_set(ld, d, lw, w, le->next, d->left, lcost, pi);
		    if (le->multi && d->left->multi) ls[3] = parse_set(ld, d, lw, w, le, d->left, lcost, pi);
		}
		if (Rmatch) {
		    rs[0] = parse_set(d, rd, w, rw, d->right->next, re->next, rcost, pi);
		    if (d->right->multi) rs[1] = parse_set(d, rd, w,rw,d->right,re->next, rcost, pi);
		    if (re->multi) rs[2] = parse_set(d, rd, w, rw, d->right->next, re, rcost, pi);
		    if (d->right->multi && re->multi) rs[3] = parse_set(d, rd, w, rw, d->right, re, rcost, pi);
		}

		for (i=0; i<4; i++) {
		    /* this ordering is probably not consistent with that needed to use list_links */
		    if (ls[i] == NULL) continue;
		    for (j=0; j<4; j++) {
			if (rs[j] == NULL) continue;
			a_choice = make_choice(ls[i], lw, w, le, d->left,
					       rs[j], w, rw, d->right, re,
					       ld, d, rd);
			put_choice_in_set(xt->set, a_choice);
		    }
		}
		
		if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL) {
		    /* evaluate using the left match, but not the right */
		    rset = parse_set(d, rd, w, rw, d->right, re, rcost, pi);
		    if (rset != NULL) {
			for (i=0; i<4; i++) {
			    if (ls[i] == NULL) continue;
			    /* this ordering is probably not consistent with that needed to use list_links */
			    a_choice = make_choice(ls[i], lw, w, le, d->left,
						   rset, w, rw, NULL /* d->right */, re,  /* the NULL indicates no link*/
						   ld, d, rd);
			    put_choice_in_set(xt->set, a_choice);
			}
		    }
		}
		if ((le == NULL) && (rs[0] != NULL || rs[1] != NULL || rs[2] != NULL || rs[3] != NULL)) {
		    /* evaluate using the right match, but not the left */
		    lset = parse_set(ld, d, lw, w, le, d->left, lcost, pi);

		    if (lset != NULL) {
			for (i=0; i<4; i++) {
			    if (rs[i] == NULL) continue;
			    /* this ordering is probably not consistent with that needed to use list_links */
			    a_choice = make_choice(lset, lw, w, NULL /* le */, d->left,  /* NULL indicates no link */
						   rs[i], w, rw, d->right, re,
						   ld, d, rd);
			    put_choice_in_set(xt->set, a_choice);
			}
		    }
		}
	    }
	}
	put_match_list(m1);
    }
    xt->set->current = xt->set->first;
    return xt->set;
}
Exemple #3
0
/**
 * Mark as useful all disjuncts involved in some way to complete the
 * structure within the current region.  Note that only disjuncts
 * strictly between lw and rw will be marked.  If it so happens that
 * this region itself is not valid, then this fact will be recorded
 * in the table, and nothing else happens.
 */
static void mark_region(Sentence sent,
                        int lw, int rw, Connector *le, Connector *re)
{

	Disjunct * d;
	int left_valid, right_valid, i;
	int start_word, end_word;
	int w;
	Match_node * m, *m1;
	count_context_t *ctxt = sent->count_ctxt;

	i = region_valid(sent, lw, rw, le, re);
	if ((i==0) || (i==2)) return;
	/* we only reach this point if it's a valid unmarked region, i=1 */
	table_update(ctxt, lw, rw, le, re, 0, 2);

	if ((le == NULL) && (re == NULL) && (ctxt->null_links) && (rw != 1+lw)) {
		w = lw+1;
		for (d = ctxt->local_sent[w].d; d != NULL; d = d->next) {
			if ((d->left == NULL) && region_valid(sent, w, rw, d->right, NULL)) {
				d->marked = TRUE;
				mark_region(sent, w, rw, d->right, NULL);
			}
		}
		mark_region(sent, w, rw, NULL, NULL);
		return;
	}

	if (le == NULL) {
		start_word = lw+1;
	} else {
		start_word = le->word;
	}
	if (re == NULL) {
		end_word = rw-1;
	} else {
		end_word = re->word;
	}

	for (w=start_word; w < end_word+1; w++) {
		m1 = m = form_match_list(sent, w, le, lw, re, rw);
		for (; m!=NULL; m=m->next) {
			d = m->d;
			/* mark_cost++;*/
			left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
						  ((region_valid(sent, lw, w, le->next, d->left->next)) ||
						   ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
						   ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
						   ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
			right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
						   ((region_valid(sent, w, rw, d->right->next,re->next)) ||
							((d->right->multi) && region_valid(sent, w,rw,d->right,re->next))  ||
							((re->multi) && region_valid(sent, w, rw, d->right->next, re)) ||
							((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));

			/* The following if statements could be restructured to avoid superfluous calls
			   to mark_region.  It didn't seem a high priority, so I didn't optimize this.
			   */

			if (left_valid && region_valid(sent, w, rw, d->right, re)) {
				d->marked = TRUE;
				mark_region(sent, w, rw, d->right, re);
				mark_region(sent, lw, w, le->next, d->left->next);
				if (le->multi) mark_region(sent, lw, w, le, d->left->next);
				if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
				if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
			}

			if (right_valid && region_valid(sent, lw, w, le, d->left)) {
				d->marked = TRUE;
				mark_region(sent, lw, w, le, d->left);
				mark_region(sent, w, rw, d->right->next,re->next);
				if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
				if (re->multi) mark_region(sent, w, rw, d->right->next, re);
				if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
			}

			if (left_valid && right_valid) {
				d->marked = TRUE;
				mark_region(sent, lw, w, le->next, d->left->next);
				if (le->multi) mark_region(sent, lw, w, le, d->left->next);
				if (d->left->multi) mark_region(sent, lw, w, le->next, d->left);
				if (le->multi && d->left->multi) mark_region(sent, lw, w, le, d->left);
				mark_region(sent, w, rw, d->right->next,re->next);
				if (d->right->multi) mark_region(sent, w,rw,d->right,re->next);
				if (re->multi) mark_region(sent, w, rw, d->right->next, re);
				if (d->right->multi && re->multi) mark_region(sent, w, rw, d->right, re);
			}
		}
		put_match_list(sent, m1);
	}
}
Exemple #4
0
/**
 * Returns 0 if this range cannot be successfully filled in with
 * links.  Returns 1 if it can, and it's not been marked, and returns
 * 2 if it can and it has been marked.
 */
static int region_valid(Sentence sent, int lw, int rw, Connector *le, Connector *re)
{
	Disjunct * d;
	int left_valid, right_valid, found;
	int i, start_word, end_word;
	int w;
	Match_node * m, *m1;

	count_context_t *ctxt = sent->count_ctxt;

	i = table_lookup(sent, lw, rw, le, re, 0);
	if (i >= 0) return i;

	if ((le == NULL) && (re == NULL) && ctxt->deletable[lw][rw]) {
		table_store(ctxt, lw, rw, le, re, 0, 1);
		return 1;
	}

	if (le == NULL) {
		start_word = lw+1;
	} else {
		start_word = le->word;
	}
	if (re == NULL) {
		end_word = rw-1;
	} else {
		end_word = re->word;
	}

	found = 0;

	for (w=start_word; w < end_word+1; w++) {
		m1 = m = form_match_list(sent, w, le, lw, re, rw);
		for (; m!=NULL; m=m->next) {
			d = m->d;
			/* mark_cost++;*/
			/* in the following expressions we use the fact that 0=FALSE. Could eliminate
			   by always saying "region_valid(...) != 0"  */
			left_valid = (((le != NULL) && (d->left != NULL) && x_prune_match(ctxt, le, d->left, lw, w)) &&
						  ((region_valid(sent, lw, w, le->next, d->left->next)) ||
						   ((le->multi) && region_valid(sent, lw, w, le, d->left->next)) ||
						   ((d->left->multi) && region_valid(sent, lw, w, le->next, d->left)) ||
						   ((le->multi && d->left->multi) && region_valid(sent, lw, w, le, d->left))));
			if (left_valid && region_valid(sent, w, rw, d->right, re)) {
				found = 1;
				break;
			}
			right_valid = (((d->right != NULL) && (re != NULL) && x_prune_match(ctxt, d->right, re, w, rw)) &&
						   ((region_valid(sent, w, rw, d->right->next,re->next))	||
							((d->right->multi) && region_valid(sent, w,rw,d->right,re->next))  ||
							((re->multi) && region_valid(sent, w, rw, d->right->next, re))  ||
							((d->right->multi && re->multi) && region_valid(sent, w, rw, d->right, re))));
			if ((left_valid && right_valid) || (right_valid && region_valid(sent, lw, w, le, d->left))) {
				found = 1;
				break;
			}
		}
		put_match_list(sent, m1);
		if (found != 0) break;
	}
	table_store(ctxt, lw, rw, le, re, 0, found);
	return found;
}
Exemple #5
0
static s64 do_count(Sentence sent, int lw, int rw,
                    Connector *le, Connector *re, int cost)
{
	Disjunct * d;
	s64 total, pseudototal;
	int start_word, end_word, w;
	s64 leftcount, rightcount;
	int lcost, rcost, Lmatch, Rmatch;

	Match_node * m, *m1;
	Table_connector *t;

	count_context_t *ctxt = sent->count_ctxt;

	if (cost < 0) return 0;  /* will we ever call it with cost<0 ? */

	t = find_table_pointer(ctxt, lw, rw, le, re, cost);

	if (t == NULL) {
		/* Create the table entry with a tentative cost of 0. 
	    * This cost must be updated before we return. */
		t = table_store(ctxt, lw, rw, le, re, cost, 0);
	} else {
		return t->count;
	}

	if (rw == 1+lw)
	{
		/* lw and rw are neighboring words */
		/* You can't have a linkage here with cost > 0 */
		if ((le == NULL) && (re == NULL) && (cost == 0))
		{
			t->count = 1;
		}
		else
		{
			t->count = 0;
		}
		return t->count;
	}

	if ((le == NULL) && (re == NULL))
	{
		if (!ctxt->islands_ok && (lw != -1))
		{
			/* If we don't allow islands (a set of words linked together
			 * but separate from the rest of the sentence) then the cost
			 * of skipping n words is just n */
			if (cost == ((rw-lw-1) + ctxt->null_block-1)/ctxt->null_block)
			{
				/* If null_block=4 then the cost of
				   1,2,3,4 nulls is 1; and 5,6,7,8 is 2 etc. */
				t->count = 1;
			}
			else
			{
				t->count = 0;
			}
			return t->count;
		}
		if (cost == 0)
		{
			/* There is no zero-cost solution in this case. There is
			 * a slight efficiency hack to separate this cost=0 case
			 * out, but not necessary for correctness */
			t->count = 0;
		}
		else
		{
			total = 0;
			w = lw+1;
			for (d = ctxt->local_sent[w].d; d != NULL; d = d->next)
			{
				if (d->left == NULL)
				{
					total += do_count(sent, w, rw, d->right, NULL, cost-1);
				}
			}
			total += do_count(sent, w, rw, NULL, NULL, cost-1);
			t->count = total;
		}
		return t->count;
	}

	if (le == NULL)
	{
		start_word = lw+1;
	}
	else
	{
		start_word = le->word;
	}

	if (re == NULL)
	{
		end_word = rw-1;
	}
	else
	{
		end_word = re->word;
	}

	total = 0;

	for (w = start_word; w < end_word+1; w++)
	{
		m1 = m = form_match_list(sent, w, le, lw, re, rw);
		for (; m!=NULL; m=m->next)
		{
			d = m->d;
			for (lcost = 0; lcost <= cost; lcost++)
			{
				rcost = cost-lcost;
				/* Now lcost and rcost are the costs we're assigning
				 * to those parts respectively */

				/* Now, we determine if (based on table only) we can see that
				   the current range is not parsable. */
				Lmatch = (le != NULL) && (d->left != NULL) && 
				         do_match(sent, le, d->left, lw, w);
				Rmatch = (d->right != NULL) && (re != NULL) && 
				         do_match(sent, d->right, re, w, rw);

				rightcount = leftcount = 0;
				if (Lmatch)
				{
					leftcount = pseudocount(sent, lw, w, le->next, d->left->next, lcost);
					if (le->multi) leftcount += pseudocount(sent, lw, w, le, d->left->next, lcost);
					if (d->left->multi) leftcount += pseudocount(sent, lw, w, le->next, d->left, lcost);
					if (le->multi && d->left->multi) leftcount += pseudocount(sent, lw, w, le, d->left, lcost);
				}

				if (Rmatch)
				{
					rightcount = pseudocount(sent, w, rw, d->right->next, re->next, rcost);
					if (d->right->multi) rightcount += pseudocount(sent, w,rw,d->right,re->next, rcost);
					if (re->multi) rightcount += pseudocount(sent, w, rw, d->right->next, re, rcost);
					if (d->right->multi && re->multi) rightcount += pseudocount(sent, w, rw, d->right, re, rcost);
				}

				/* total number where links are used on both sides */
				pseudototal = leftcount*rightcount;

				if (leftcount > 0) {
					/* evaluate using the left match, but not the right */
					pseudototal += leftcount * pseudocount(sent, w, rw, d->right, re, rcost);
				}
				if ((le == NULL) && (rightcount > 0)) {
					/* evaluate using the right match, but not the left */
					pseudototal += rightcount * pseudocount(sent, lw, w, le, d->left, lcost);
				}

				/* now pseudototal is 0 implies that we know that the true total is 0 */
				if (pseudototal != 0) {
					rightcount = leftcount = 0;
					if (Lmatch) {
						leftcount = do_count(sent, lw, w, le->next, d->left->next, lcost);
						if (le->multi) leftcount += do_count(sent, lw, w, le, d->left->next, lcost);
						if (d->left->multi) leftcount += do_count(sent, lw, w, le->next, d->left, lcost);
						if (le->multi && d->left->multi) leftcount += do_count(sent, lw, w, le, d->left, lcost);
					}

					if (Rmatch) {
						rightcount = do_count(sent, w, rw, d->right->next, re->next, rcost);
						if (d->right->multi) rightcount += do_count(sent, w,rw,d->right,re->next, rcost);
						if (re->multi) rightcount += do_count(sent, w, rw, d->right->next, re, rcost);
						if (d->right->multi && re->multi) rightcount += do_count(sent, w, rw, d->right, re, rcost);
					}

					total += leftcount*rightcount;  /* total number where links are used on both sides */

					if (leftcount > 0) {
						/* evaluate using the left match, but not the right */
						total += leftcount * do_count(sent, w, rw, d->right, re, rcost);
					}
					if ((le == NULL) && (rightcount > 0)) {
						/* evaluate using the right match, but not the left */
						total += rightcount * do_count(sent, lw, w, le, d->left, lcost);
					}
				}
			}
		}

		put_match_list(sent, m1);
	}
	t->count = total;
	return total;
}
Exemple #6
0
static Count_bin do_count(fast_matcher_t *mchxt,
                          count_context_t *ctxt,
                          int lw, int rw,
                          Connector *le, Connector *re,
                          int null_count)
{
	Count_bin zero = hist_zero();
	Count_bin total;
	int start_word, end_word, w;
	Table_connector *t;

	assert (0 <= null_count, "Bad null count");

	t = find_table_pointer(ctxt, lw, rw, le, re, null_count);

	if (t) return t->count;

	/* Create the table entry with a tentative null count of 0.
	 * This count must be updated before we return. */
	t = table_store(ctxt, lw, rw, le, re, null_count);

	if (rw == 1+lw)
	{
		/* lw and rw are neighboring words */
		/* You can't have a linkage here with null_count > 0 */
		if ((le == NULL) && (re == NULL) && (null_count == 0))
		{
			t->count = hist_one();
		}
		else
		{
			t->count = zero;
		}
		return t->count;
	}

	/* The left and right connectors are null, but the two words are
	 * NOT next to each-other. */
	if ((le == NULL) && (re == NULL))
	{
		if (!ctxt->islands_ok && (lw != -1))
		{
			/* If we don't allow islands (a set of words linked together
			 * but separate from the rest of the sentence) then the
			 * null_count of skipping n words is just n. */
			if (null_count == (rw-lw-1))
			{
				t->count = hist_one();
			}
			else
			{
				t->count = zero;
			}
			return t->count;
		}
		if (null_count == 0)
		{
			/* There is no solution without nulls in this case. There is
			 * a slight efficiency hack to separate this null_count==0
			 * case out, but not necessary for correctness */
			t->count = zero;
		}
		else
		{
			t->count = zero;
			Disjunct * d;
			int w = lw + 1;
			for (d = ctxt->local_sent[w].d; d != NULL; d = d->next)
			{
				if (d->left == NULL)
				{
					hist_accumv(&t->count, d->cost,
						do_count(mchxt, ctxt, w, rw, d->right, NULL, null_count-1));
				}
			}
			hist_accumv(&t->count, 0.0,
				do_count(mchxt, ctxt, w, rw, NULL, NULL, null_count-1));
		}
		return t->count;
	}

	if (le == NULL)
	{
		start_word = lw+1;
	}
	else
	{
		start_word = le->word;
	}

	if (re == NULL)
	{
		end_word = rw;
	}
	else
	{
		end_word = re->word +1;
	}

	total = zero;

	for (w = start_word; w < end_word; w++)
	{
		Match_node *m, *m1;
		m1 = m = form_match_list(mchxt, w, le, lw, re, rw);
		for (; m != NULL; m = m->next)
		{
			unsigned int lnull_cnt, rnull_cnt;
			Disjunct * d = m->d;
			/* _p1 avoids a gcc warning about unsafe loop opt */
			unsigned int null_count_p1 = null_count + 1;

			for (lnull_cnt = 0; lnull_cnt < null_count_p1; lnull_cnt++)
			{
				bool Lmatch, Rmatch;
				bool leftpcount = false;
				bool rightpcount = false;
				bool pseudototal = false;

				rnull_cnt = null_count - lnull_cnt;
				/* Now lnull_cnt and rnull_cnt are the costs we're assigning
				 * to those parts respectively */

				/* Now, we determine if (based on table only) we can see that
				   the current range is not parsable. */
				Lmatch = (le != NULL) && (d->left != NULL) &&
				         do_match(le, d->left, lw, w);
				Rmatch = (d->right != NULL) && (re != NULL) &&
				         do_match(d->right, re, w, rw);

				/* First, perform pseudocounting as an optimization. If
				 * the pseudocount is zero, then we know that the true
				 * count will be zero, and so skip counting entirely,
				 * in that case.
				 */
				if (Lmatch)
				{
					leftpcount = pseudocount(ctxt, lw, w, le->next, d->left->next, lnull_cnt);
					if (!leftpcount && le->multi)
						leftpcount =
							pseudocount(ctxt, lw, w, le, d->left->next, lnull_cnt);
					if (!leftpcount && d->left->multi)
						leftpcount =
							pseudocount(ctxt, lw, w, le->next, d->left, lnull_cnt);
					if (!leftpcount && le->multi && d->left->multi)
						leftpcount =
							pseudocount(ctxt, lw, w, le, d->left, lnull_cnt);
				}

				if (Rmatch)
				{
					rightpcount = pseudocount(ctxt, w, rw, d->right->next, re->next, rnull_cnt);
					if (!rightpcount && d->right->multi)
						rightpcount =
							pseudocount(ctxt, w,rw, d->right, re->next, rnull_cnt);
					if (!rightpcount && re->multi)
						rightpcount =
							pseudocount(ctxt, w, rw, d->right->next, re, rnull_cnt);
					if (!rightpcount && d->right->multi && re->multi)
						rightpcount =
							pseudocount(ctxt, w, rw, d->right, re, rnull_cnt);
				}

				/* Total number where links are used on both sides */
				pseudototal = leftpcount && rightpcount;

				if (!pseudototal && leftpcount) {
					/* Evaluate using the left match, but not the right. */
					pseudototal =
						pseudocount(ctxt, w, rw, d->right, re, rnull_cnt);
				}
				if (!pseudototal && (le == NULL) && rightpcount) {
					/* Evaluate using the right match, but not the left. */
					pseudototal =
						pseudocount(ctxt, lw, w, le, d->left, lnull_cnt);
				}

				/* If pseudototal is zero (false), that implies that
				 * we know that the true total is zero. So we don't
				 * bother counting at all, in that case. */
				if (pseudototal)
				{
					Count_bin leftcount = zero;
					Count_bin rightcount = zero;
					if (Lmatch) {
						leftcount = do_count(mchxt, ctxt, lw, w, le->next, d->left->next, lnull_cnt);
						if (le->multi)
							hist_accumv(&leftcount, d->cost,
								do_count(mchxt, ctxt, lw, w, le, d->left->next, lnull_cnt));
						if (d->left->multi)
							hist_accumv(&leftcount, d->cost,
								 do_count(mchxt, ctxt, lw, w, le->next, d->left, lnull_cnt));
						if (le->multi && d->left->multi)
							hist_accumv(&leftcount, d->cost,
								do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt));
					}

					if (Rmatch) {
						rightcount = do_count(mchxt, ctxt, w, rw, d->right->next, re->next, rnull_cnt);
						if (d->right->multi)
							hist_accumv(&rightcount, d->cost,
								do_count(mchxt, ctxt, w, rw, d->right,re->next, rnull_cnt));
						if (re->multi)
							hist_accumv(&rightcount, d->cost,
								do_count(mchxt, ctxt, w, rw, d->right->next, re, rnull_cnt));
						if (d->right->multi && re->multi)
							hist_accumv(&rightcount, d->cost,
								do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt));
					}

					/* Total number where links are used on both sides */
					hist_muladd(&total, &leftcount, 0.0, &rightcount);

					if (0 < hist_total(&leftcount))
					{
						/* Evaluate using the left match, but not the right */
						hist_muladdv(&total, &leftcount, d->cost,
							do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt));
					}
					if ((le == NULL) && (0 < hist_total(&rightcount)))
					{
						/* Evaluate using the right match, but not the left */
						hist_muladdv(&total, &rightcount, d->cost,
							do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt));
					}

					/* Sigh. Overflows can and do occur, esp for the ANY language. */
					if (INT_MAX < hist_total(&total))
					{
#ifdef PERFORM_COUNT_HISTOGRAMMING
						total.total = INT_MAX;
#else
						total = INT_MAX;
#endif /* PERFORM_COUNT_HISTOGRAMMING */
						t->count = total;
						put_match_list(mchxt, m1);
						return total;
					}
				}
			}
		}
		put_match_list(mchxt, m1);
	}
	t->count = total;
	return total;
}