Esempio n. 1
0
static s64 do_count(Sentence sent, int lw, int rw,
                    Connector *le, Connector *re, int cost)
{
	Disjunct * d;
	s64 total, pseudototal;
	int start_word, end_word, w;
	s64 leftcount, rightcount;
	int lcost, rcost, Lmatch, Rmatch;

	Match_node * m, *m1;
	Table_connector *t;

	count_context_t *ctxt = sent->count_ctxt;

	if (cost < 0) return 0;  /* will we ever call it with cost<0 ? */

	t = find_table_pointer(ctxt, lw, rw, le, re, cost);

	if (t == NULL) {
		/* Create the table entry with a tentative cost of 0. 
	    * This cost must be updated before we return. */
		t = table_store(ctxt, lw, rw, le, re, cost, 0);
	} else {
		return t->count;
	}

	if (rw == 1+lw)
	{
		/* lw and rw are neighboring words */
		/* You can't have a linkage here with cost > 0 */
		if ((le == NULL) && (re == NULL) && (cost == 0))
		{
			t->count = 1;
		}
		else
		{
			t->count = 0;
		}
		return t->count;
	}

	if ((le == NULL) && (re == NULL))
	{
		if (!ctxt->islands_ok && (lw != -1))
		{
			/* If we don't allow islands (a set of words linked together
			 * but separate from the rest of the sentence) then the cost
			 * of skipping n words is just n */
			if (cost == ((rw-lw-1) + ctxt->null_block-1)/ctxt->null_block)
			{
				/* If null_block=4 then the cost of
				   1,2,3,4 nulls is 1; and 5,6,7,8 is 2 etc. */
				t->count = 1;
			}
			else
			{
				t->count = 0;
			}
			return t->count;
		}
		if (cost == 0)
		{
			/* There is no zero-cost solution in this case. There is
			 * a slight efficiency hack to separate this cost=0 case
			 * out, but not necessary for correctness */
			t->count = 0;
		}
		else
		{
			total = 0;
			w = lw+1;
			for (d = ctxt->local_sent[w].d; d != NULL; d = d->next)
			{
				if (d->left == NULL)
				{
					total += do_count(sent, w, rw, d->right, NULL, cost-1);
				}
			}
			total += do_count(sent, w, rw, NULL, NULL, cost-1);
			t->count = total;
		}
		return t->count;
	}

	if (le == NULL)
	{
		start_word = lw+1;
	}
	else
	{
		start_word = le->word;
	}

	if (re == NULL)
	{
		end_word = rw-1;
	}
	else
	{
		end_word = re->word;
	}

	total = 0;

	for (w = start_word; w < end_word+1; w++)
	{
		m1 = m = form_match_list(sent, w, le, lw, re, rw);
		for (; m!=NULL; m=m->next)
		{
			d = m->d;
			for (lcost = 0; lcost <= cost; lcost++)
			{
				rcost = cost-lcost;
				/* Now lcost and rcost are the costs we're assigning
				 * to those parts respectively */

				/* Now, we determine if (based on table only) we can see that
				   the current range is not parsable. */
				Lmatch = (le != NULL) && (d->left != NULL) && 
				         do_match(sent, le, d->left, lw, w);
				Rmatch = (d->right != NULL) && (re != NULL) && 
				         do_match(sent, d->right, re, w, rw);

				rightcount = leftcount = 0;
				if (Lmatch)
				{
					leftcount = pseudocount(sent, lw, w, le->next, d->left->next, lcost);
					if (le->multi) leftcount += pseudocount(sent, lw, w, le, d->left->next, lcost);
					if (d->left->multi) leftcount += pseudocount(sent, lw, w, le->next, d->left, lcost);
					if (le->multi && d->left->multi) leftcount += pseudocount(sent, lw, w, le, d->left, lcost);
				}

				if (Rmatch)
				{
					rightcount = pseudocount(sent, w, rw, d->right->next, re->next, rcost);
					if (d->right->multi) rightcount += pseudocount(sent, w,rw,d->right,re->next, rcost);
					if (re->multi) rightcount += pseudocount(sent, w, rw, d->right->next, re, rcost);
					if (d->right->multi && re->multi) rightcount += pseudocount(sent, w, rw, d->right, re, rcost);
				}

				/* total number where links are used on both sides */
				pseudototal = leftcount*rightcount;

				if (leftcount > 0) {
					/* evaluate using the left match, but not the right */
					pseudototal += leftcount * pseudocount(sent, w, rw, d->right, re, rcost);
				}
				if ((le == NULL) && (rightcount > 0)) {
					/* evaluate using the right match, but not the left */
					pseudototal += rightcount * pseudocount(sent, lw, w, le, d->left, lcost);
				}

				/* now pseudototal is 0 implies that we know that the true total is 0 */
				if (pseudototal != 0) {
					rightcount = leftcount = 0;
					if (Lmatch) {
						leftcount = do_count(sent, lw, w, le->next, d->left->next, lcost);
						if (le->multi) leftcount += do_count(sent, lw, w, le, d->left->next, lcost);
						if (d->left->multi) leftcount += do_count(sent, lw, w, le->next, d->left, lcost);
						if (le->multi && d->left->multi) leftcount += do_count(sent, lw, w, le, d->left, lcost);
					}

					if (Rmatch) {
						rightcount = do_count(sent, w, rw, d->right->next, re->next, rcost);
						if (d->right->multi) rightcount += do_count(sent, w,rw,d->right,re->next, rcost);
						if (re->multi) rightcount += do_count(sent, w, rw, d->right->next, re, rcost);
						if (d->right->multi && re->multi) rightcount += do_count(sent, w, rw, d->right, re, rcost);
					}

					total += leftcount*rightcount;  /* total number where links are used on both sides */

					if (leftcount > 0) {
						/* evaluate using the left match, but not the right */
						total += leftcount * do_count(sent, w, rw, d->right, re, rcost);
					}
					if ((le == NULL) && (rightcount > 0)) {
						/* evaluate using the right match, but not the left */
						total += rightcount * do_count(sent, lw, w, le, d->left, lcost);
					}
				}
			}
		}

		put_match_list(sent, m1);
	}
	t->count = total;
	return total;
}
Esempio n. 2
0
static Count_bin do_count(fast_matcher_t *mchxt,
                          count_context_t *ctxt,
                          int lw, int rw,
                          Connector *le, Connector *re,
                          int null_count)
{
	Count_bin zero = hist_zero();
	Count_bin total;
	int start_word, end_word, w;
	Table_connector *t;

	assert (0 <= null_count, "Bad null count");

	t = find_table_pointer(ctxt, lw, rw, le, re, null_count);

	if (t) return t->count;

	/* Create the table entry with a tentative null count of 0.
	 * This count must be updated before we return. */
	t = table_store(ctxt, lw, rw, le, re, null_count);

	if (rw == 1+lw)
	{
		/* lw and rw are neighboring words */
		/* You can't have a linkage here with null_count > 0 */
		if ((le == NULL) && (re == NULL) && (null_count == 0))
		{
			t->count = hist_one();
		}
		else
		{
			t->count = zero;
		}
		return t->count;
	}

	/* The left and right connectors are null, but the two words are
	 * NOT next to each-other. */
	if ((le == NULL) && (re == NULL))
	{
		if (!ctxt->islands_ok && (lw != -1))
		{
			/* If we don't allow islands (a set of words linked together
			 * but separate from the rest of the sentence) then the
			 * null_count of skipping n words is just n. */
			if (null_count == (rw-lw-1))
			{
				t->count = hist_one();
			}
			else
			{
				t->count = zero;
			}
			return t->count;
		}
		if (null_count == 0)
		{
			/* There is no solution without nulls in this case. There is
			 * a slight efficiency hack to separate this null_count==0
			 * case out, but not necessary for correctness */
			t->count = zero;
		}
		else
		{
			t->count = zero;
			Disjunct * d;
			int w = lw + 1;
			for (d = ctxt->local_sent[w].d; d != NULL; d = d->next)
			{
				if (d->left == NULL)
				{
					hist_accumv(&t->count, d->cost,
						do_count(mchxt, ctxt, w, rw, d->right, NULL, null_count-1));
				}
			}
			hist_accumv(&t->count, 0.0,
				do_count(mchxt, ctxt, w, rw, NULL, NULL, null_count-1));
		}
		return t->count;
	}

	if (le == NULL)
	{
		start_word = lw+1;
	}
	else
	{
		start_word = le->word;
	}

	if (re == NULL)
	{
		end_word = rw;
	}
	else
	{
		end_word = re->word +1;
	}

	total = zero;

	for (w = start_word; w < end_word; w++)
	{
		Match_node *m, *m1;
		m1 = m = form_match_list(mchxt, w, le, lw, re, rw);
		for (; m != NULL; m = m->next)
		{
			unsigned int lnull_cnt, rnull_cnt;
			Disjunct * d = m->d;
			/* _p1 avoids a gcc warning about unsafe loop opt */
			unsigned int null_count_p1 = null_count + 1;

			for (lnull_cnt = 0; lnull_cnt < null_count_p1; lnull_cnt++)
			{
				bool Lmatch, Rmatch;
				bool leftpcount = false;
				bool rightpcount = false;
				bool pseudototal = false;

				rnull_cnt = null_count - lnull_cnt;
				/* Now lnull_cnt and rnull_cnt are the costs we're assigning
				 * to those parts respectively */

				/* Now, we determine if (based on table only) we can see that
				   the current range is not parsable. */
				Lmatch = (le != NULL) && (d->left != NULL) &&
				         do_match(le, d->left, lw, w);
				Rmatch = (d->right != NULL) && (re != NULL) &&
				         do_match(d->right, re, w, rw);

				/* First, perform pseudocounting as an optimization. If
				 * the pseudocount is zero, then we know that the true
				 * count will be zero, and so skip counting entirely,
				 * in that case.
				 */
				if (Lmatch)
				{
					leftpcount = pseudocount(ctxt, lw, w, le->next, d->left->next, lnull_cnt);
					if (!leftpcount && le->multi)
						leftpcount =
							pseudocount(ctxt, lw, w, le, d->left->next, lnull_cnt);
					if (!leftpcount && d->left->multi)
						leftpcount =
							pseudocount(ctxt, lw, w, le->next, d->left, lnull_cnt);
					if (!leftpcount && le->multi && d->left->multi)
						leftpcount =
							pseudocount(ctxt, lw, w, le, d->left, lnull_cnt);
				}

				if (Rmatch)
				{
					rightpcount = pseudocount(ctxt, w, rw, d->right->next, re->next, rnull_cnt);
					if (!rightpcount && d->right->multi)
						rightpcount =
							pseudocount(ctxt, w,rw, d->right, re->next, rnull_cnt);
					if (!rightpcount && re->multi)
						rightpcount =
							pseudocount(ctxt, w, rw, d->right->next, re, rnull_cnt);
					if (!rightpcount && d->right->multi && re->multi)
						rightpcount =
							pseudocount(ctxt, w, rw, d->right, re, rnull_cnt);
				}

				/* Total number where links are used on both sides */
				pseudototal = leftpcount && rightpcount;

				if (!pseudototal && leftpcount) {
					/* Evaluate using the left match, but not the right. */
					pseudototal =
						pseudocount(ctxt, w, rw, d->right, re, rnull_cnt);
				}
				if (!pseudototal && (le == NULL) && rightpcount) {
					/* Evaluate using the right match, but not the left. */
					pseudototal =
						pseudocount(ctxt, lw, w, le, d->left, lnull_cnt);
				}

				/* If pseudototal is zero (false), that implies that
				 * we know that the true total is zero. So we don't
				 * bother counting at all, in that case. */
				if (pseudototal)
				{
					Count_bin leftcount = zero;
					Count_bin rightcount = zero;
					if (Lmatch) {
						leftcount = do_count(mchxt, ctxt, lw, w, le->next, d->left->next, lnull_cnt);
						if (le->multi)
							hist_accumv(&leftcount, d->cost,
								do_count(mchxt, ctxt, lw, w, le, d->left->next, lnull_cnt));
						if (d->left->multi)
							hist_accumv(&leftcount, d->cost,
								 do_count(mchxt, ctxt, lw, w, le->next, d->left, lnull_cnt));
						if (le->multi && d->left->multi)
							hist_accumv(&leftcount, d->cost,
								do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt));
					}

					if (Rmatch) {
						rightcount = do_count(mchxt, ctxt, w, rw, d->right->next, re->next, rnull_cnt);
						if (d->right->multi)
							hist_accumv(&rightcount, d->cost,
								do_count(mchxt, ctxt, w, rw, d->right,re->next, rnull_cnt));
						if (re->multi)
							hist_accumv(&rightcount, d->cost,
								do_count(mchxt, ctxt, w, rw, d->right->next, re, rnull_cnt));
						if (d->right->multi && re->multi)
							hist_accumv(&rightcount, d->cost,
								do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt));
					}

					/* Total number where links are used on both sides */
					hist_muladd(&total, &leftcount, 0.0, &rightcount);

					if (0 < hist_total(&leftcount))
					{
						/* Evaluate using the left match, but not the right */
						hist_muladdv(&total, &leftcount, d->cost,
							do_count(mchxt, ctxt, w, rw, d->right, re, rnull_cnt));
					}
					if ((le == NULL) && (0 < hist_total(&rightcount)))
					{
						/* Evaluate using the right match, but not the left */
						hist_muladdv(&total, &rightcount, d->cost,
							do_count(mchxt, ctxt, lw, w, le, d->left, lnull_cnt));
					}

					/* Sigh. Overflows can and do occur, esp for the ANY language. */
					if (INT_MAX < hist_total(&total))
					{
#ifdef PERFORM_COUNT_HISTOGRAMMING
						total.total = INT_MAX;
#else
						total = INT_MAX;
#endif /* PERFORM_COUNT_HISTOGRAMMING */
						t->count = total;
						put_match_list(mchxt, m1);
						return total;
					}
				}
			}
		}
		put_match_list(mchxt, m1);
	}
	t->count = total;
	return total;
}