/**
 * This uses link_array.  It post-processes
 * this linkage, and prints the appropriate thing.  There are no fat
 * links in it.
 */
Linkage_info analyze_thin_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
{
	int i;
	Linkage_info li;
	PP_node * pp;
	Postprocessor * postprocessor;
	Sublinkage *sublinkage;
	Parse_info pi = sent->parse_info;

	build_digraph(pi, word_links);
	memset(&li, 0, sizeof(li));

	sublinkage = x_create_sublinkage(pi);
	postprocessor = sent->dict->postprocessor;

	compute_link_names(sent);
	for (i=0; i<pi->N_links; i++) {
	  copy_full_link(&(sublinkage->link[i]), &(pi->link_array[i]));
	}

	if (analyze_pass==PP_FIRST_PASS) {
		post_process_scan_linkage(postprocessor, opts, sent, sublinkage);
		free_sublinkage(sublinkage);
		free_digraph(pi, word_links);
		return li;
	}

	/* The code below can be used to generate the "islands" array. For this to work,
	 * however, you have to call "build_digraph" first (as in analyze_fat_linkage).
	 * and then "free_digraph". For some reason this causes a space leak. */

	pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);

	li.N_violations = 0;
	li.and_cost = 0;
	li.unused_word_cost = unused_word_cost(sent->parse_info);
	li.improper_fat_linkage = FALSE;
	li.inconsistent_domains = FALSE;
	li.disjunct_cost = disjunct_cost(pi);
	li.null_cost = null_cost(pi);
	li.link_cost = link_cost(pi);
	li.andlist = NULL;

	if (pp==NULL) {
		if (postprocessor != NULL) li.N_violations = 1;
	} else if (pp->violation!=NULL) {
		li.N_violations++;
	}

	free_sublinkage(sublinkage);
	free_digraph(pi, word_links);
	return li;
}
Exemple #2
0
/** This does basic post-processing for all linkages.
 */
static void post_process_linkages(Sentence sent, Parse_Options opts)
{
	size_t in;
	size_t N_linkages_post_processed = 0;
	size_t N_valid_linkages = sent->num_valid_linkages;
	size_t N_linkages_alloced = sent->num_linkages_alloced;
	bool twopass = sent->length >= opts->twopass_length;

	/* (optional) First pass: just visit the linkages */
	/* The purpose of the first pass is to make the post-processing
	 * more efficient.  Because (hopefully) by the time the real work
	 * is done in the 2nd pass, the relevant rule set has been pruned
	 * in the first pass.
	 */
	if (twopass)
	{
		for (in=0; in < N_linkages_alloced; in++)
		{
			Linkage lkg = &sent->lnkages[in];
			Linkage_info *lifo = &lkg->lifo;

			if (lifo->discarded || lifo->N_violations) continue;

			post_process_scan_linkage(sent->postprocessor, lkg);

			if ((49 == in%50) && resources_exhausted(opts->resources)) break;
		}
	}

	/* Second pass: actually perform post-processing */
	for (in=0; in < N_linkages_alloced; in++)
	{
		PP_node *ppn;
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded || lifo->N_violations) continue;

		ppn = do_post_process(sent->postprocessor, lkg, twopass);

		/* XXX There is no need to set the domain names if we are not
		 * printing them. However, deferring this until later requires
		 * a huge code re-org, because pp_data is needed to get the
		 * domain type array, and pp_data is deleted immediately below.
		 * Basically, pp_data and pp_node should be a part of the linkage,
		 * and not part of the Postprocessor struct.
		 * This costs about 1% performance penalty. */
		build_type_array(sent->postprocessor);
		linkage_set_domain_names(sent->postprocessor, lkg);

	   post_process_free_data(&sent->postprocessor->pp_data);

		if (NULL != ppn->violation)
		{
			N_valid_linkages--;
			lifo->N_violations++;

			/* Set the message, only if not set (e.g. by sane_morphism) */
			if (NULL == lifo->pp_violation_msg)
				lifo->pp_violation_msg = ppn->violation;
		}
		N_linkages_post_processed++;

		linkage_score(lkg, opts);
		if ((9 == in%10) && resources_exhausted(opts->resources)) break;
	}

	/* If the timer expired, then we never finished post-processing.
	 * Mark the remaining sentences as bad, as otherwise strange
	 * results get reported.  At any rate, need to compute the link
	 * names, as otherwise linkage_create() will crash and burn
	 * trying to touch them. */
	for (; in < N_linkages_alloced; in++)
	{
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded || lifo->N_violations) continue;

		N_valid_linkages--;
		lifo->N_violations++;

		/* Set the message, only if not set (e.g. by sane_morphism) */
		if (NULL == lifo->pp_violation_msg)
			lifo->pp_violation_msg = "Timeout during postprocessing";
	}

	print_time(opts, "Postprocessed all linkages");

	if (opts->verbosity > 1)
	{
		err_ctxt ec;
		ec.sent = sent;
		err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n",
		        N_valid_linkages, N_linkages_post_processed);
	}

	sent->num_linkages_post_processed = N_linkages_post_processed;
	sent->num_valid_linkages = N_valid_linkages;
}
/**
 * This uses link_array.  It enumerates and post-processes
 * all the linkages represented by this one.  We know this contains
 * at least one fat link.
 */
Linkage_info analyze_fat_linkage(Sentence sent, Parse_Options opts, int analyze_pass)
{
	int i;
	Linkage_info li;
	DIS_node *d_root;
	PP_node *pp;
	Postprocessor *postprocessor;
	Sublinkage *sublinkage;
	Parse_info pi = sent->parse_info;
	PP_node accum;			   /* for domain ancestry check */
	D_type_list * dtl0, * dtl1;  /* for domain ancestry check */

	sublinkage = x_create_sublinkage(pi);
	postprocessor = sent->dict->postprocessor;
	build_digraph(pi, word_links);
	structure_violation = FALSE;
	d_root = build_DIS_CON_tree(pi, word_links); /* may set structure_violation to TRUE */

	li.N_violations = 0;
	li.improper_fat_linkage = structure_violation;
	li.inconsistent_domains = FALSE;
	li.unused_word_cost = unused_word_cost(sent->parse_info);
	li.disjunct_cost = disjunct_cost(pi);
	li.null_cost = null_cost(pi);
	li.link_cost = link_cost(pi);
	li.and_cost = 0;
	li.andlist = NULL;

	if (structure_violation) {
		li.N_violations++;
		free_sublinkage(sublinkage);
		free_digraph(pi, word_links);
		free_DIS_tree(d_root);
		return li;
	}

	if (analyze_pass==PP_SECOND_PASS) {
	  li.andlist = build_andlist(sent, word_links);
	  li.and_cost = li.andlist->cost;
	}
	else li.and_cost = 0;

	compute_link_names(sent);

	for (i=0; i<pi->N_links; i++) accum.d_type_array[i] = NULL;

	for (;;) {		/* loop through all the sub linkages */
		for (i=0; i<pi->N_links; i++) {
			patch_array[i].used = patch_array[i].changed = FALSE;
			patch_array[i].newl = pi->link_array[i].l;
			patch_array[i].newr = pi->link_array[i].r;
			copy_full_link(&sublinkage->link[i], &(pi->link_array[i]));
		}
		fill_patch_array_DIS(d_root, NULL, word_links);

		for (i=0; i<pi->N_links; i++) {
			if (patch_array[i].changed || patch_array[i].used) {
				sublinkage->link[i]->l = patch_array[i].newl;
				sublinkage->link[i]->r = patch_array[i].newr;
			}
			else if ((dfs_root_word[pi->link_array[i].l] != -1) &&
					 (dfs_root_word[pi->link_array[i].r] != -1)) {
				sublinkage->link[i]->l = -1;
			}
		}

		compute_pp_link_array_connectors(sent, sublinkage);
		compute_pp_link_names(sent, sublinkage);

		/* 'analyze_pass' logic added ALB 1/97 */
		if (analyze_pass==PP_FIRST_PASS) {
			post_process_scan_linkage(postprocessor,opts,sent,sublinkage);
			if (!advance_DIS(d_root)) break;
			else continue;
		}

		pp = post_process(postprocessor, opts, sent, sublinkage, TRUE);

		if (pp==NULL) {
			if (postprocessor != NULL) li.N_violations = 1;
		}
		else if (pp->violation == NULL)  {
			/* the purpose of this stuff is to make sure the domain
			   ancestry for a link in each of its sentences is consistent. */

			for (i=0; i<pi->N_links; i++) {
				if (sublinkage->link[i]->l == -1) continue;
				if (accum.d_type_array[i] == NULL) {
					accum.d_type_array[i] = copy_d_type(pp->d_type_array[i]);
				} else {
					dtl0 = pp->d_type_array[i];
					dtl1 = accum.d_type_array[i];
					while((dtl0 != NULL) && (dtl1 != NULL) && (dtl0->type == dtl1->type)) {
						dtl0 = dtl0->next;
						dtl1 = dtl1->next;
					}
					if ((dtl0 != NULL) || (dtl1 != NULL)) break;
				}
			}
			if (i != pi->N_links) {
				li.N_violations++;
				li.inconsistent_domains = TRUE;
			}
		}
		else if (pp->violation!=NULL) {
			li.N_violations++;
		}

		if (!advance_DIS(d_root)) break;
	}

	for (i=0; i<pi->N_links; ++i) {
		free_d_type(accum.d_type_array[i]);
	}

	/* if (display_on && (li.N_violations != 0) &&
	   (verbosity > 3) && should_print_messages)
	   printf("P.P. violation in one part of conjunction.\n"); */
	free_sublinkage(sublinkage);
	free_digraph(pi, word_links);
	free_DIS_tree(d_root);
	return li;
}
Exemple #4
0
/** This does basic post-processing for all linkages.
 */
static void post_process_linkages(Sentence sent, Parse_Options opts)
{
	size_t in;
	size_t N_linkages_post_processed = 0;
	size_t N_valid_linkages = sent->num_valid_linkages;
	size_t N_linkages_alloced = sent->num_linkages_alloced;
	bool twopass = sent->length >= opts->twopass_length;

	/* (optional) First pass: just visit the linkages */
	/* The purpose of the first pass is to make the post-processing
	 * more efficient.  Because (hopefully) by the time the real work
	 * is done in the 2nd pass, the relevant rule set has been pruned
	 * in the first pass.
	 */
	if (twopass)
	{
		for (in=0; in < N_linkages_alloced; in++)
		{
			Linkage lkg = &sent->lnkages[in];
			Linkage_info *lifo = &lkg->lifo;
			if (lifo->discarded) continue;

			/* We still need link names, even if there has been a morfo
			 * violation. */
			compute_link_names(lkg, sent->string_set);
			if (lifo->N_violations) continue;

			post_process_scan_linkage(sent->postprocessor, lkg);

			if ((49 == in%50) && resources_exhausted(opts->resources)) break;
		}
	}

	/* Second pass: actually perform post-processing */
	for (in=0; in < N_linkages_alloced; in++)
	{
		PP_node *ppn;
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded) continue; /* Invalid morphism construction */

		/* We need link names, even if morfo check fails */
		if (!twopass) compute_link_names(lkg, sent->string_set);

		ppn = do_post_process(sent->postprocessor, lkg, twopass);
	   post_process_free_data(&sent->postprocessor->pp_data);

		if (NULL != ppn->violation)
		{
			N_valid_linkages--;
			lifo->N_violations++;

			/* Set the message, only if not set (e.g. by sane_morphism) */
			if (NULL == lifo->pp_violation_msg)
				lifo->pp_violation_msg = ppn->violation;
		}
		N_linkages_post_processed++;

		linkage_score(lkg, opts);
		if ((9 == in%10) && resources_exhausted(opts->resources)) break;
	}

	/* If the timer expired, then we never finished post-processing.
	 * Mark the remaining sentences as bad, as otherwise strange
	 * results get reported.  At any rate, need to compute the link
	 * names, as otherwise linkage_create() will crash and burn
	 * trying to touch them. */
	for (; in < N_linkages_alloced; in++)
	{
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;
		if (lifo->discarded) continue;
		if (!twopass) compute_link_names(lkg, sent->string_set);
		N_valid_linkages--;
		lifo->N_violations++;

		/* Set the message, only if not set (e.g. by sane_morphism) */
		if (NULL == lifo->pp_violation_msg)
			lifo->pp_violation_msg = "Timeout during postprocessing";
	}

	print_time(opts, "Postprocessed all linkages");

	if (opts->verbosity > 1)
	{
		err_ctxt ec;
		ec.sent = sent;
		err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n",
		        N_valid_linkages, N_linkages_post_processed);
	}

	sent->num_linkages_post_processed = N_linkages_post_processed;
	sent->num_valid_linkages = N_valid_linkages;
}