示例#1
0
static bool setup_linkages(Sentence sent, extractor_t* pex,
                          fast_matcher_t* mchxt,
                          count_context_t* ctxt,
                          Parse_Options opts)
{
	bool overflowed = build_parse_set(pex, sent, mchxt, ctxt, sent->null_count, opts);
	print_time(opts, "Built parse set");

	if (overflowed && (1 < opts->verbosity))
	{
		err_ctxt ec = { sent };
		err_msgc(&ec, lg_Warn, "Count overflow.\n"
			"Considering a random subset of %zu of an unknown and large number of linkages\n",
			opts->linkage_limit);
	}

	if (sent->num_linkages_found == 0)
	{
		sent->num_linkages_alloced = 0;
		sent->num_linkages_post_processed = 0;
		sent->num_valid_linkages = 0;
		sent->lnkages = NULL;
		return overflowed;
	}

	sent->num_linkages_alloced =
		MIN(sent->num_linkages_found, (int) opts->linkage_limit);

	/* Now actually malloc the array in which we will process linkages. */
	/* We may have been called before, e.g. this might be a panic parse,
	 * and the linkages array may still be there from last time.
	 * XXX free_linkages() zeros sent->num_linkages_found. */
	if (sent->lnkages) free_linkages(sent);
	sent->lnkages = linkage_array_new(sent->num_linkages_alloced);

	return overflowed;
}
示例#2
0
static void select_linkages(Sentence sent, fast_matcher_t* mchxt,
                            count_context_t* ctxt,
                            Parse_Options opts)
{
	size_t in;
	size_t N_linkages_found, N_linkages_alloced;

	bool overflowed = build_parse_set(sent, mchxt, ctxt, sent->null_count, opts);
	print_time(opts, "Built parse set");

	if (overflowed && (1 < opts->verbosity))
	{
		err_ctxt ec;
		ec.sent = sent;
		err_msg(&ec, Warn, "Warning: Count overflow.\n"
		  "Considering a random subset of %zu of an unknown and large number of linkages\n",
			opts->linkage_limit);
	}
	N_linkages_found = sent->num_linkages_found;

	if (sent->num_linkages_found == 0)
	{
		sent->num_linkages_alloced = 0;
		sent->num_linkages_post_processed = 0;
		sent->num_valid_linkages = 0;
		sent->lnkages = NULL;
		return;
	}

	if (N_linkages_found > opts->linkage_limit)
	{
		N_linkages_alloced = opts->linkage_limit;
		if (opts->verbosity > 1)
		{
			err_ctxt ec;
			ec.sent = sent;
			err_msg(&ec, Warn,
			    "Warning: Considering a random subset of %zu of %zu linkages\n",
			    N_linkages_alloced, N_linkages_found);
		}
	}
	else
	{
		N_linkages_alloced = N_linkages_found;
	}

	/* Now actually malloc the array in which we will process linkages. */
	/* We may have been called before, e.g this might be a panic parse,
	 * and the linkages array may still be there from last time.
	 * XXX free_linkages() zeros sent->num_linkages_found. */
	if (sent->lnkages) free_linkages(sent);
	sent->num_linkages_found = N_linkages_found;
	sent->lnkages = linkage_array_new(N_linkages_alloced);

	/* Generate an array of linkage indices to examine */
	if (overflowed)
	{
		/* The negative index means that a random subset of links
		 * will be picked later on, in extract_links(). */
		for (in=0; in < N_linkages_alloced; in++)
		{
			sent->lnkages[in].lifo.index = -(in+1);
		}
	}
	else if (N_linkages_found == N_linkages_alloced)
	{
		for (in=0; in<N_linkages_alloced; in++)
			sent->lnkages[in].lifo.index = in;
	}
	else
	{
		/* There are more linkages found than we can handle */
		/* Pick a (quasi-)uniformly distributed random subset. */
		if (opts->repeatable_rand)
			sent->rand_state = N_linkages_found + sent->length;

		for (in=0; in<N_linkages_alloced; in++)
		{
			size_t block_bottom, block_top;
			double frac = (double) N_linkages_found;

			frac /= (double) N_linkages_alloced;
			block_bottom = (int) (((double) in) * frac);
			block_top = (int) (((double) (in+1)) * frac);
			sent->lnkages[in].lifo.index = block_bottom +
				(rand_r(&sent->rand_state) % (block_top-block_bottom));
		}
	}

	sent->num_linkages_alloced = N_linkages_alloced;
	/* Later, we subtract the number of invalid linkages */
	sent->num_valid_linkages = N_linkages_alloced;
}