void linkage_delete(Linkage linkage) { int i, j; Sublinkage *s; for (i=0; i<linkage->num_words; ++i) { exfree(linkage->word[i], strlen(linkage->word[i])+1); } exfree(linkage->word, sizeof(char *)*linkage->num_words); for (i=0; i<linkage->num_sublinkages; ++i) { s = &(linkage->sublinkage[i]); for (j=0; j<s->num_links; ++j) { exfree_link(s->link[j]); } exfree(s->link, sizeof(Link)*s->num_links); if (s->pp_info != NULL) { for (j=0; j<s->num_links; ++j) { exfree_pp_info(s->pp_info[j]); } exfree(s->pp_info, sizeof(PP_info)*s->num_links); post_process_free_data(&s->pp_data); } if (s->violation != NULL) { exfree(s->violation, sizeof(char)*(strlen(s->violation)+1)); } } exfree(linkage->sublinkage, sizeof(Sublinkage)*linkage->num_sublinkages); exfree(linkage, sizeof(struct Linkage_s)); }
PP_node *post_process(Postprocessor *pp, Parse_Options opts, Sentence sent, Sublinkage *sublinkage, int cleanup) { /* Takes a sublinkage and returns: . for each link, the domain structure of that link . a list of the violation strings NB: sublinkage->link[i]->l=-1 means that this connector is to be ignored*/ char *msg; if (pp==NULL) return NULL; pp->pp_data.links_to_ignore = NULL; pp->pp_data.length = sent->length; /* In the name of responsible memory management, we retain a copy of the returned data structure pp_node as a field in pp, so that we can clear it out after every call, without relying on the user to do so. */ reset_pp_node(pp); /* The first time we see a sentence, prune the rules which we won't be needing during postprocessing the linkages of this sentence */ if (sent->q_pruned_rules==FALSE && sent->length >= opts->twopass_length) prune_irrelevant_rules(pp); sent->q_pruned_rules=TRUE; switch(internal_process(pp, sublinkage, &msg)) { case -1: /* some global test failed even before we had to build the domains */ pp->n_global_rules_firing++; pp->pp_node->violation = msg; return pp->pp_node; break; case 1: /* one of the "normal" post processing tests failed */ pp->n_local_rules_firing++; pp->pp_node->violation = msg; break; case 0: /* This linkage is legal according to the post processing rules */ pp->pp_node->violation = NULL; break; } build_type_array(pp); if (cleanup) post_process_free_data(&pp->pp_data); return pp->pp_node; }
void post_process_free(Postprocessor *pp) { /* frees up memory associated with pp, previously allocated by open */ if (pp == NULL) return; string_set_delete(pp->string_set); pp_linkset_close(pp->set_of_links_of_sentence); pp_linkset_close(pp->set_of_links_in_an_active_rule); free(pp->relevant_contains_one_rules); free(pp->relevant_contains_none_rules); pp->knowledge = NULL; free_pp_node(pp); free(pp->visited); post_process_free_data(&pp->pp_data); free(pp->pp_data.domain_array); free(pp->pp_data.word_links); free(pp); }
/** This does basic post-processing for all linkages. */ static void post_process_linkages(Sentence sent, Parse_Options opts) { size_t in; size_t N_linkages_post_processed = 0; size_t N_valid_linkages = sent->num_valid_linkages; size_t N_linkages_alloced = sent->num_linkages_alloced; bool twopass = sent->length >= opts->twopass_length; /* (optional) First pass: just visit the linkages */ /* The purpose of the first pass is to make the post-processing * more efficient. Because (hopefully) by the time the real work * is done in the 2nd pass, the relevant rule set has been pruned * in the first pass. */ if (twopass) { for (in=0; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; post_process_scan_linkage(sent->postprocessor, lkg); if ((49 == in%50) && resources_exhausted(opts->resources)) break; } } /* Second pass: actually perform post-processing */ for (in=0; in < N_linkages_alloced; in++) { PP_node *ppn; Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; ppn = do_post_process(sent->postprocessor, lkg, twopass); /* XXX There is no need to set the domain names if we are not * printing them. However, deferring this until later requires * a huge code re-org, because pp_data is needed to get the * domain type array, and pp_data is deleted immediately below. * Basically, pp_data and pp_node should be a part of the linkage, * and not part of the Postprocessor struct. * This costs about 1% performance penalty. */ build_type_array(sent->postprocessor); linkage_set_domain_names(sent->postprocessor, lkg); post_process_free_data(&sent->postprocessor->pp_data); if (NULL != ppn->violation) { N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = ppn->violation; } N_linkages_post_processed++; linkage_score(lkg, opts); if ((9 == in%10) && resources_exhausted(opts->resources)) break; } /* If the timer expired, then we never finished post-processing. * Mark the remaining sentences as bad, as otherwise strange * results get reported. At any rate, need to compute the link * names, as otherwise linkage_create() will crash and burn * trying to touch them. */ for (; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = "Timeout during postprocessing"; } print_time(opts, "Postprocessed all linkages"); if (opts->verbosity > 1) { err_ctxt ec; ec.sent = sent; err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n", N_valid_linkages, N_linkages_post_processed); } sent->num_linkages_post_processed = N_linkages_post_processed; sent->num_valid_linkages = N_valid_linkages; }
void linkage_post_process(Linkage linkage, Postprocessor * postprocessor) { int N_sublinkages = linkage_get_num_sublinkages(linkage); Parse_Options opts = linkage->opts; Sentence sent = linkage->sent; Sublinkage * subl; PP_node * pp; int i, j, k; D_type_list * d; for (i=0; i<N_sublinkages; ++i) { subl = &linkage->sublinkage[i]; if (subl->pp_info != NULL) { for (j=0; j<subl->num_links; ++j) { exfree_pp_info(subl->pp_info[j]); } post_process_free_data(&subl->pp_data); exfree(subl->pp_info, sizeof(PP_info)*subl->num_links); } subl->pp_info = (PP_info *) exalloc(sizeof(PP_info)*subl->num_links); for (j=0; j<subl->num_links; ++j) { subl->pp_info[j].num_domains = 0; subl->pp_info[j].domain_name = NULL; } if (subl->violation != NULL) { exfree(subl->violation, sizeof(char)*(strlen(subl->violation)+1)); subl->violation = NULL; } if (linkage->info.improper_fat_linkage) { pp = NULL; } else { pp = post_process(postprocessor, opts, sent, subl, FALSE); /* This can return NULL, for example if there is no post-processor */ } if (pp == NULL) { for (j=0; j<subl->num_links; ++j) { subl->pp_info[j].num_domains = 0; subl->pp_info[j].domain_name = NULL; } } else { for (j=0; j<subl->num_links; ++j) { k=0; for (d = pp->d_type_array[j]; d!=NULL; d=d->next) k++; subl->pp_info[j].num_domains = k; if (k > 0) { subl->pp_info[j].domain_name = (char **) exalloc(sizeof(char *)*k); } k = 0; for (d = pp->d_type_array[j]; d!=NULL; d=d->next) { subl->pp_info[j].domain_name[k] = (char *) exalloc(sizeof(char)*2); sprintf(subl->pp_info[j].domain_name[k], "%c", d->type); k++; } } subl->pp_data = postprocessor->pp_data; if (pp->violation != NULL) { subl->violation = (char *) exalloc(sizeof(char)*(strlen(pp->violation)+1)); strcpy(subl->violation, pp->violation); } } } post_process_close_sentence(postprocessor); }
/** This does basic post-processing for all linkages. */ static void post_process_linkages(Sentence sent, Parse_Options opts) { size_t in; size_t N_linkages_post_processed = 0; size_t N_valid_linkages = sent->num_valid_linkages; size_t N_linkages_alloced = sent->num_linkages_alloced; bool twopass = sent->length >= opts->twopass_length; /* (optional) First pass: just visit the linkages */ /* The purpose of the first pass is to make the post-processing * more efficient. Because (hopefully) by the time the real work * is done in the 2nd pass, the relevant rule set has been pruned * in the first pass. */ if (twopass) { for (in=0; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded) continue; /* We still need link names, even if there has been a morfo * violation. */ compute_link_names(lkg, sent->string_set); if (lifo->N_violations) continue; post_process_scan_linkage(sent->postprocessor, lkg); if ((49 == in%50) && resources_exhausted(opts->resources)) break; } } /* Second pass: actually perform post-processing */ for (in=0; in < N_linkages_alloced; in++) { PP_node *ppn; Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded) continue; /* Invalid morphism construction */ /* We need link names, even if morfo check fails */ if (!twopass) compute_link_names(lkg, sent->string_set); ppn = do_post_process(sent->postprocessor, lkg, twopass); post_process_free_data(&sent->postprocessor->pp_data); if (NULL != ppn->violation) { N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = ppn->violation; } N_linkages_post_processed++; linkage_score(lkg, opts); if ((9 == in%10) && resources_exhausted(opts->resources)) break; } /* If the timer expired, then we never finished post-processing. * Mark the remaining sentences as bad, as otherwise strange * results get reported. At any rate, need to compute the link * names, as otherwise linkage_create() will crash and burn * trying to touch them. */ for (; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded) continue; if (!twopass) compute_link_names(lkg, sent->string_set); N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = "Timeout during postprocessing"; } print_time(opts, "Postprocessed all linkages"); if (opts->verbosity > 1) { err_ctxt ec; ec.sent = sent; err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n", N_valid_linkages, N_linkages_post_processed); } sent->num_linkages_post_processed = N_linkages_post_processed; sent->num_valid_linkages = N_valid_linkages; }