/** * Takes a linkage and returns: * . for each link, the domain structure of that link * . a list of the violation strings * NB: linkage->link[i]->l=-1 means that this connector is to be ignored. */ PP_node *do_post_process(Postprocessor *pp, Linkage sublinkage, bool is_long) { const char *msg; if (pp == NULL) return NULL; // XXX wtf .. why is this not leaking memory ? pp->pp_data.links_to_ignore = NULL; pp->pp_data.num_words = sublinkage->num_words; /* Grab more memory if needed */ if (pp->vlength <= pp->pp_data.num_words) { size_t newsz; pp->vlength += pp->pp_data.num_words; newsz = pp->vlength * sizeof(bool); pp->visited = (bool *) realloc(pp->visited, newsz); } clear_visited(pp); /* In the name of responsible memory management, we retain a copy of the * returned data structure pp_node as a field in pp, so that we can clear * it out after every call, without relying on the user to do so. */ clear_pp_node(pp); /* For long sentences, we can save some time by pruning the rules * which can't possibly be used during postprocessing the linkages * of this sentence. For short sentences, this is pointless. */ if (is_long && pp->q_pruned_rules == false) { prune_irrelevant_rules(pp); } pp->q_pruned_rules = true; switch (internal_process(pp, sublinkage, &msg)) { case -1: /* some global test failed even before we had to build the domains */ pp->n_global_rules_firing++; pp->pp_node->violation = msg; report_pp_stats(pp); return pp->pp_node; break; case 1: /* one of the "normal" post processing tests failed */ pp->n_local_rules_firing++; pp->pp_node->violation = msg; break; case 0: /* This linkage is legal according to the post processing rules */ pp->pp_node->violation = NULL; break; } report_pp_stats(pp); build_type_array(pp); return pp->pp_node; }
PP_node *post_process(Postprocessor *pp, Parse_Options opts, Sentence sent, Sublinkage *sublinkage, int cleanup) { /* Takes a sublinkage and returns: . for each link, the domain structure of that link . a list of the violation strings NB: sublinkage->link[i]->l=-1 means that this connector is to be ignored*/ char *msg; if (pp==NULL) return NULL; pp->pp_data.links_to_ignore = NULL; pp->pp_data.length = sent->length; /* In the name of responsible memory management, we retain a copy of the returned data structure pp_node as a field in pp, so that we can clear it out after every call, without relying on the user to do so. */ reset_pp_node(pp); /* The first time we see a sentence, prune the rules which we won't be needing during postprocessing the linkages of this sentence */ if (sent->q_pruned_rules==FALSE && sent->length >= opts->twopass_length) prune_irrelevant_rules(pp); sent->q_pruned_rules=TRUE; switch(internal_process(pp, sublinkage, &msg)) { case -1: /* some global test failed even before we had to build the domains */ pp->n_global_rules_firing++; pp->pp_node->violation = msg; return pp->pp_node; break; case 1: /* one of the "normal" post processing tests failed */ pp->n_local_rules_firing++; pp->pp_node->violation = msg; break; case 0: /* This linkage is legal according to the post processing rules */ pp->pp_node->violation = NULL; break; } build_type_array(pp); if (cleanup) post_process_free_data(&pp->pp_data); return pp->pp_node; }
/** * Store the domain names in the linkage. * This is an utter waste of CPU time, if on is not interested * in printing the domain names. * * XXX TODO: refactor, so that this does not need to be called except * when printing the domain names. */ void linkage_set_domain_names(Postprocessor * postprocessor, Linkage linkage) { PP_node * pp; size_t j, k; D_type_list * d; if (NULL == linkage) return; if (NULL == postprocessor) return; /* The only reason to build the type array is for this function. */ build_type_array(postprocessor); linkage->pp_info = (PP_info *) exalloc(sizeof(PP_info) * linkage->num_links); for (j = 0; j < linkage->num_links; ++j) { linkage->pp_info[j].num_domains = 0; linkage->pp_info[j].domain_name = NULL; } /* Copy the post-processing results over into the linkage */ pp = postprocessor->pp_node; if (pp->violation != NULL) return; for (j = 0; j < linkage->num_links; ++j) { k = 0; for (d = pp->d_type_array[j]; d != NULL; d = d->next) k++; linkage->pp_info[j].num_domains = k; if (k > 0) { linkage->pp_info[j].domain_name = (const char **) exalloc(sizeof(const char *)*k); } k = 0; for (d = pp->d_type_array[j]; d != NULL; d = d->next) { char buff[5]; snprintf(buff, 5, "%c", d->type); linkage->pp_info[j].domain_name[k] = string_set_add (buff, postprocessor->string_set); k++; } } }
/** This does basic post-processing for all linkages. */ static void post_process_linkages(Sentence sent, Parse_Options opts) { size_t in; size_t N_linkages_post_processed = 0; size_t N_valid_linkages = sent->num_valid_linkages; size_t N_linkages_alloced = sent->num_linkages_alloced; bool twopass = sent->length >= opts->twopass_length; /* (optional) First pass: just visit the linkages */ /* The purpose of the first pass is to make the post-processing * more efficient. Because (hopefully) by the time the real work * is done in the 2nd pass, the relevant rule set has been pruned * in the first pass. */ if (twopass) { for (in=0; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; post_process_scan_linkage(sent->postprocessor, lkg); if ((49 == in%50) && resources_exhausted(opts->resources)) break; } } /* Second pass: actually perform post-processing */ for (in=0; in < N_linkages_alloced; in++) { PP_node *ppn; Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; ppn = do_post_process(sent->postprocessor, lkg, twopass); /* XXX There is no need to set the domain names if we are not * printing them. However, deferring this until later requires * a huge code re-org, because pp_data is needed to get the * domain type array, and pp_data is deleted immediately below. * Basically, pp_data and pp_node should be a part of the linkage, * and not part of the Postprocessor struct. * This costs about 1% performance penalty. */ build_type_array(sent->postprocessor); linkage_set_domain_names(sent->postprocessor, lkg); post_process_free_data(&sent->postprocessor->pp_data); if (NULL != ppn->violation) { N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = ppn->violation; } N_linkages_post_processed++; linkage_score(lkg, opts); if ((9 == in%10) && resources_exhausted(opts->resources)) break; } /* If the timer expired, then we never finished post-processing. * Mark the remaining sentences as bad, as otherwise strange * results get reported. At any rate, need to compute the link * names, as otherwise linkage_create() will crash and burn * trying to touch them. */ for (; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = "Timeout during postprocessing"; } print_time(opts, "Postprocessed all linkages"); if (opts->verbosity > 1) { err_ctxt ec; ec.sent = sent; err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n", N_valid_linkages, N_linkages_post_processed); } sent->num_linkages_post_processed = N_linkages_post_processed; sent->num_valid_linkages = N_valid_linkages; }