Beispiel #1
0
/**
 * Takes a linkage and returns:
 *  . for each link, the domain structure of that link
 *  . a list of the violation strings
 * NB: linkage->link[i]->l=-1 means that this connector is to be ignored.
 */
PP_node *do_post_process(Postprocessor *pp, Linkage sublinkage, bool is_long)
{
	const char *msg;

	if (pp == NULL) return NULL;

	// XXX wtf .. why is this not leaking memory ?
	pp->pp_data.links_to_ignore = NULL;

	pp->pp_data.num_words = sublinkage->num_words;

	/* Grab more memory if needed */
	if (pp->vlength <= pp->pp_data.num_words)
	{
		size_t newsz;
		pp->vlength += pp->pp_data.num_words;
		newsz = pp->vlength * sizeof(bool);
		pp->visited = (bool *) realloc(pp->visited, newsz);
	}
	clear_visited(pp);

	/* In the name of responsible memory management, we retain a copy of the
	 * returned data structure pp_node as a field in pp, so that we can clear
	 * it out after every call, without relying on the user to do so. */
	clear_pp_node(pp);

	/* For long sentences, we can save some time by pruning the rules
	 * which can't possibly be used during postprocessing the linkages
	 * of this sentence. For short sentences, this is pointless. */
	if (is_long && pp->q_pruned_rules == false)
	{
		prune_irrelevant_rules(pp);
	}
	pp->q_pruned_rules = true;

	switch (internal_process(pp, sublinkage, &msg))
	{
		case -1:
			/* some global test failed even before we had to build the domains */
			pp->n_global_rules_firing++;
			pp->pp_node->violation = msg;
			report_pp_stats(pp);
			return pp->pp_node;
			break;
		case 1:
			/* one of the "normal" post processing tests failed */
			pp->n_local_rules_firing++;
			pp->pp_node->violation = msg;
			break;
		case 0:
			/* This linkage is legal according to the post processing rules */
			pp->pp_node->violation = NULL;
			break;
	}

	report_pp_stats(pp);
	build_type_array(pp);

	return pp->pp_node;
}
Beispiel #2
0
PP_node *post_process(Postprocessor *pp, Parse_Options opts,
		      Sentence sent, Sublinkage *sublinkage, int cleanup) 
{
  /* Takes a sublinkage and returns:
     . for each link, the domain structure of that link
     . a list of the violation strings
     NB: sublinkage->link[i]->l=-1 means that this connector is to be ignored*/
  
  char *msg;

  if (pp==NULL) return NULL;

  pp->pp_data.links_to_ignore = NULL;
  pp->pp_data.length = sent->length;

  /* In the name of responsible memory management, we retain a copy of the 
     returned data structure pp_node as a field in pp, so that we can clear
     it out after every call, without relying on the user to do so. */
  reset_pp_node(pp);

  /* The first time we see a sentence, prune the rules which we won't be 
     needing during postprocessing the linkages of this sentence */
  if (sent->q_pruned_rules==FALSE && sent->length >= opts->twopass_length)
    prune_irrelevant_rules(pp);
  sent->q_pruned_rules=TRUE;

  switch(internal_process(pp, sublinkage, &msg))
    {
    case -1:
      /* some global test failed even before we had to build the domains */
      pp->n_global_rules_firing++;
      pp->pp_node->violation = msg;
      return pp->pp_node;
      break;
    case 1:
      /* one of the "normal" post processing tests failed */
      pp->n_local_rules_firing++;
      pp->pp_node->violation = msg;
      break; 
    case 0:
      /* This linkage is legal according to the post processing rules */
      pp->pp_node->violation = NULL;
      break;
    }

  build_type_array(pp);
  if (cleanup) post_process_free_data(&pp->pp_data);  
  return pp->pp_node;
}
Beispiel #3
0
/**
 * Store the domain names in the linkage.
 * This is an utter waste of CPU time, if on is not interested
 * in printing the domain names.
 *
 * XXX TODO: refactor, so that this does not need to be called except
 * when printing the domain names.
 */
void linkage_set_domain_names(Postprocessor * postprocessor, Linkage linkage)
{
	PP_node * pp;
	size_t j, k;
	D_type_list * d;

	if (NULL == linkage) return;
	if (NULL == postprocessor) return;

	/* The only reason to build the type array is for this function. */
	build_type_array(postprocessor);

	linkage->pp_info = (PP_info *) exalloc(sizeof(PP_info) * linkage->num_links);

	for (j = 0; j < linkage->num_links; ++j)
	{
		linkage->pp_info[j].num_domains = 0;
		linkage->pp_info[j].domain_name = NULL;
	}

	/* Copy the post-processing results over into the linkage */
	pp = postprocessor->pp_node;
	if (pp->violation != NULL)
		return;

	for (j = 0; j < linkage->num_links; ++j)
	{
		k = 0;
		for (d = pp->d_type_array[j]; d != NULL; d = d->next) k++;
		linkage->pp_info[j].num_domains = k;
		if (k > 0)
		{
			linkage->pp_info[j].domain_name = (const char **) exalloc(sizeof(const char *)*k);
		}
		k = 0;
		for (d = pp->d_type_array[j]; d != NULL; d = d->next)
		{
			char buff[5];
			snprintf(buff, 5, "%c", d->type);
			linkage->pp_info[j].domain_name[k] =
			      string_set_add (buff, postprocessor->string_set);

			k++;
		}
	}
}
Beispiel #4
0
/** This does basic post-processing for all linkages.
 */
static void post_process_linkages(Sentence sent, Parse_Options opts)
{
	size_t in;
	size_t N_linkages_post_processed = 0;
	size_t N_valid_linkages = sent->num_valid_linkages;
	size_t N_linkages_alloced = sent->num_linkages_alloced;
	bool twopass = sent->length >= opts->twopass_length;

	/* (optional) First pass: just visit the linkages */
	/* The purpose of the first pass is to make the post-processing
	 * more efficient.  Because (hopefully) by the time the real work
	 * is done in the 2nd pass, the relevant rule set has been pruned
	 * in the first pass.
	 */
	if (twopass)
	{
		for (in=0; in < N_linkages_alloced; in++)
		{
			Linkage lkg = &sent->lnkages[in];
			Linkage_info *lifo = &lkg->lifo;

			if (lifo->discarded || lifo->N_violations) continue;

			post_process_scan_linkage(sent->postprocessor, lkg);

			if ((49 == in%50) && resources_exhausted(opts->resources)) break;
		}
	}

	/* Second pass: actually perform post-processing */
	for (in=0; in < N_linkages_alloced; in++)
	{
		PP_node *ppn;
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded || lifo->N_violations) continue;

		ppn = do_post_process(sent->postprocessor, lkg, twopass);

		/* XXX There is no need to set the domain names if we are not
		 * printing them. However, deferring this until later requires
		 * a huge code re-org, because pp_data is needed to get the
		 * domain type array, and pp_data is deleted immediately below.
		 * Basically, pp_data and pp_node should be a part of the linkage,
		 * and not part of the Postprocessor struct.
		 * This costs about 1% performance penalty. */
		build_type_array(sent->postprocessor);
		linkage_set_domain_names(sent->postprocessor, lkg);

	   post_process_free_data(&sent->postprocessor->pp_data);

		if (NULL != ppn->violation)
		{
			N_valid_linkages--;
			lifo->N_violations++;

			/* Set the message, only if not set (e.g. by sane_morphism) */
			if (NULL == lifo->pp_violation_msg)
				lifo->pp_violation_msg = ppn->violation;
		}
		N_linkages_post_processed++;

		linkage_score(lkg, opts);
		if ((9 == in%10) && resources_exhausted(opts->resources)) break;
	}

	/* If the timer expired, then we never finished post-processing.
	 * Mark the remaining sentences as bad, as otherwise strange
	 * results get reported.  At any rate, need to compute the link
	 * names, as otherwise linkage_create() will crash and burn
	 * trying to touch them. */
	for (; in < N_linkages_alloced; in++)
	{
		Linkage lkg = &sent->lnkages[in];
		Linkage_info *lifo = &lkg->lifo;

		if (lifo->discarded || lifo->N_violations) continue;

		N_valid_linkages--;
		lifo->N_violations++;

		/* Set the message, only if not set (e.g. by sane_morphism) */
		if (NULL == lifo->pp_violation_msg)
			lifo->pp_violation_msg = "Timeout during postprocessing";
	}

	print_time(opts, "Postprocessed all linkages");

	if (opts->verbosity > 1)
	{
		err_ctxt ec;
		ec.sent = sent;
		err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n",
		        N_valid_linkages, N_linkages_post_processed);
	}

	sent->num_linkages_post_processed = N_linkages_post_processed;
	sent->num_valid_linkages = N_valid_linkages;
}