Esempio n. 1
0
static char * do_print_flat_constituents(con_context_t *ctxt, Linkage linkage)
{
	int numcon_total= 0, numcon_subl;
	char * q;
	Sentence sent = linkage->sent;

	assert(NULL != sent->lnkages, "No linkages"); /* Sentence already free()'d */
	ctxt->phrase_ss = string_set_create();
	generate_misc_word_info(ctxt, linkage);

	if (NULL ==  sent->constituent_pp)         /* First time for this sentence */
		sent->constituent_pp = post_process_new(sent->dict->hpsg_knowledge);
	do_post_process(sent->constituent_pp, linkage, linkage->is_sent_long);

	/** No-op. If we wanted to debug domain names, we could do this...
	 * linkage_free_pp_info(linkage);
	 * linkage_set_domain_names(sent->constituent_pp, linkage);
	 */

	linkage->hpsg_pp_data = sent->constituent_pp->pp_data;
	pp_new_domain_array(&linkage->hpsg_pp_data);

	numcon_subl = read_constituents_from_domains(ctxt, linkage, numcon_total);
	numcon_total += numcon_subl;
	assert (numcon_total < ctxt->conlen, "Too many constituents (c)");
	numcon_total = merge_constituents(ctxt, linkage, numcon_total);
	assert (numcon_total < ctxt->conlen, "Too many constituents (d)");
	numcon_total = new_style_conjunctions(ctxt, linkage, numcon_total);
	assert (numcon_total < ctxt->conlen, "Too many constituents (e)");
	numcon_total = last_minute_fixes(ctxt, linkage, numcon_total);
	assert (numcon_total < ctxt->conlen, "Too many constituents (f)");
	q = exprint_constituent_structure(ctxt, linkage, numcon_total);
	string_set_delete(ctxt->phrase_ss);
	ctxt->phrase_ss = NULL;
	return q;
}
Esempio n. 2
0
/** This does basic post-processing for all linkages.
 */
static void post_process_linkages(Sentence sent, Parse_Options opts)
{
	size_t in;
	size_t N_linkages_post_processed = 0;
	size_t N_valid_linkages = sent->num_valid_linkages;
	size_t N_linkages_alloced = sent->num_linkages_alloced;
	bool twopass = sent->length >= opts->twopass_length;

	/* (optional) First pass: just visit the linkages */
	/* The purpose of the first pass is to make the post-processing
	 * more efficient.  Because (hopefully) by the time the real work
	 * is done in the 2nd pass, the relevant rule set has been pruned
	 * in the first pass.
	 */
	if (twopass)
	{
		for (in=0; in < N_linkages_alloced; in++)
		{
			Linkage lkg = &sent->lnkages[in];
			Linkage_info *lifo = &lkg->lifo;

			if (lifo->discarded || lifo->N_violations) continue;

			post_process_scan_linkage(sent->postprocessor, lkg);

			if ((49 == in%50) && resources_exhausted(opts->resources)) break;
		}
	}

	/* Second pass: actually perform post-processing */
	for (in=0; in < N_linkages_alloced; in++)
	{
		PP_node *ppn;
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded || lifo->N_violations) continue;

		ppn = do_post_process(sent->postprocessor, lkg, twopass);

		/* XXX There is no need to set the domain names if we are not
		 * printing them. However, deferring this until later requires
		 * a huge code re-org, because pp_data is needed to get the
		 * domain type array, and pp_data is deleted immediately below.
		 * Basically, pp_data and pp_node should be a part of the linkage,
		 * and not part of the Postprocessor struct.
		 * This costs about 1% performance penalty. */
		build_type_array(sent->postprocessor);
		linkage_set_domain_names(sent->postprocessor, lkg);

	   post_process_free_data(&sent->postprocessor->pp_data);

		if (NULL != ppn->violation)
		{
			N_valid_linkages--;
			lifo->N_violations++;

			/* Set the message, only if not set (e.g. by sane_morphism) */
			if (NULL == lifo->pp_violation_msg)
				lifo->pp_violation_msg = ppn->violation;
		}
		N_linkages_post_processed++;

		linkage_score(lkg, opts);
		if ((9 == in%10) && resources_exhausted(opts->resources)) break;
	}

	/* If the timer expired, then we never finished post-processing.
	 * Mark the remaining sentences as bad, as otherwise strange
	 * results get reported.  At any rate, need to compute the link
	 * names, as otherwise linkage_create() will crash and burn
	 * trying to touch them. */
	for (; in < N_linkages_alloced; in++)
	{
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded || lifo->N_violations) continue;

		N_valid_linkages--;
		lifo->N_violations++;

		/* Set the message, only if not set (e.g. by sane_morphism) */
		if (NULL == lifo->pp_violation_msg)
			lifo->pp_violation_msg = "Timeout during postprocessing";
	}

	print_time(opts, "Postprocessed all linkages");

	if (opts->verbosity > 1)
	{
		err_ctxt ec;
		ec.sent = sent;
		err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n",
		        N_valid_linkages, N_linkages_post_processed);
	}

	sent->num_linkages_post_processed = N_linkages_post_processed;
	sent->num_valid_linkages = N_valid_linkages;
}
Esempio n. 3
0
/** This does basic post-processing for all linkages.
 */
static void post_process_linkages(Sentence sent, Parse_Options opts)
{
	size_t in;
	size_t N_linkages_post_processed = 0;
	size_t N_valid_linkages = sent->num_valid_linkages;
	size_t N_linkages_alloced = sent->num_linkages_alloced;
	bool twopass = sent->length >= opts->twopass_length;

	/* (optional) First pass: just visit the linkages */
	/* The purpose of the first pass is to make the post-processing
	 * more efficient.  Because (hopefully) by the time the real work
	 * is done in the 2nd pass, the relevant rule set has been pruned
	 * in the first pass.
	 */
	if (twopass)
	{
		for (in=0; in < N_linkages_alloced; in++)
		{
			Linkage lkg = &sent->lnkages[in];
			Linkage_info *lifo = &lkg->lifo;
			if (lifo->discarded) continue;

			/* We still need link names, even if there has been a morfo
			 * violation. */
			compute_link_names(lkg, sent->string_set);
			if (lifo->N_violations) continue;

			post_process_scan_linkage(sent->postprocessor, lkg);

			if ((49 == in%50) && resources_exhausted(opts->resources)) break;
		}
	}

	/* Second pass: actually perform post-processing */
	for (in=0; in < N_linkages_alloced; in++)
	{
		PP_node *ppn;
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded) continue; /* Invalid morphism construction */

		/* We need link names, even if morfo check fails */
		if (!twopass) compute_link_names(lkg, sent->string_set);

		ppn = do_post_process(sent->postprocessor, lkg, twopass);
	   post_process_free_data(&sent->postprocessor->pp_data);

		if (NULL != ppn->violation)
		{
			N_valid_linkages--;
			lifo->N_violations++;

			/* Set the message, only if not set (e.g. by sane_morphism) */
			if (NULL == lifo->pp_violation_msg)
				lifo->pp_violation_msg = ppn->violation;
		}
		N_linkages_post_processed++;

		linkage_score(lkg, opts);
		if ((9 == in%10) && resources_exhausted(opts->resources)) break;
	}

	/* If the timer expired, then we never finished post-processing.
	 * Mark the remaining sentences as bad, as otherwise strange
	 * results get reported.  At any rate, need to compute the link
	 * names, as otherwise linkage_create() will crash and burn
	 * trying to touch them. */
	for (; in < N_linkages_alloced; in++)
	{
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;
		if (lifo->discarded) continue;
		if (!twopass) compute_link_names(lkg, sent->string_set);
		N_valid_linkages--;
		lifo->N_violations++;

		/* Set the message, only if not set (e.g. by sane_morphism) */
		if (NULL == lifo->pp_violation_msg)
			lifo->pp_violation_msg = "Timeout during postprocessing";
	}

	print_time(opts, "Postprocessed all linkages");

	if (opts->verbosity > 1)
	{
		err_ctxt ec;
		ec.sent = sent;
		err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n",
		        N_valid_linkages, N_linkages_post_processed);
	}

	sent->num_linkages_post_processed = N_linkages_post_processed;
	sent->num_valid_linkages = N_valid_linkages;
}