static char * do_print_flat_constituents(con_context_t *ctxt, Linkage linkage) { int numcon_total= 0, numcon_subl; char * q; Sentence sent = linkage->sent; assert(NULL != sent->lnkages, "No linkages"); /* Sentence already free()'d */ ctxt->phrase_ss = string_set_create(); generate_misc_word_info(ctxt, linkage); if (NULL == sent->constituent_pp) /* First time for this sentence */ sent->constituent_pp = post_process_new(sent->dict->hpsg_knowledge); do_post_process(sent->constituent_pp, linkage, linkage->is_sent_long); /** No-op. If we wanted to debug domain names, we could do this... * linkage_free_pp_info(linkage); * linkage_set_domain_names(sent->constituent_pp, linkage); */ linkage->hpsg_pp_data = sent->constituent_pp->pp_data; pp_new_domain_array(&linkage->hpsg_pp_data); numcon_subl = read_constituents_from_domains(ctxt, linkage, numcon_total); numcon_total += numcon_subl; assert (numcon_total < ctxt->conlen, "Too many constituents (c)"); numcon_total = merge_constituents(ctxt, linkage, numcon_total); assert (numcon_total < ctxt->conlen, "Too many constituents (d)"); numcon_total = new_style_conjunctions(ctxt, linkage, numcon_total); assert (numcon_total < ctxt->conlen, "Too many constituents (e)"); numcon_total = last_minute_fixes(ctxt, linkage, numcon_total); assert (numcon_total < ctxt->conlen, "Too many constituents (f)"); q = exprint_constituent_structure(ctxt, linkage, numcon_total); string_set_delete(ctxt->phrase_ss); ctxt->phrase_ss = NULL; return q; }
/** This does basic post-processing for all linkages. */ static void post_process_linkages(Sentence sent, Parse_Options opts) { size_t in; size_t N_linkages_post_processed = 0; size_t N_valid_linkages = sent->num_valid_linkages; size_t N_linkages_alloced = sent->num_linkages_alloced; bool twopass = sent->length >= opts->twopass_length; /* (optional) First pass: just visit the linkages */ /* The purpose of the first pass is to make the post-processing * more efficient. Because (hopefully) by the time the real work * is done in the 2nd pass, the relevant rule set has been pruned * in the first pass. */ if (twopass) { for (in=0; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; post_process_scan_linkage(sent->postprocessor, lkg); if ((49 == in%50) && resources_exhausted(opts->resources)) break; } } /* Second pass: actually perform post-processing */ for (in=0; in < N_linkages_alloced; in++) { PP_node *ppn; Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; ppn = do_post_process(sent->postprocessor, lkg, twopass); /* XXX There is no need to set the domain names if we are not * printing them. However, deferring this until later requires * a huge code re-org, because pp_data is needed to get the * domain type array, and pp_data is deleted immediately below. * Basically, pp_data and pp_node should be a part of the linkage, * and not part of the Postprocessor struct. * This costs about 1% performance penalty. */ build_type_array(sent->postprocessor); linkage_set_domain_names(sent->postprocessor, lkg); post_process_free_data(&sent->postprocessor->pp_data); if (NULL != ppn->violation) { N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = ppn->violation; } N_linkages_post_processed++; linkage_score(lkg, opts); if ((9 == in%10) && resources_exhausted(opts->resources)) break; } /* If the timer expired, then we never finished post-processing. * Mark the remaining sentences as bad, as otherwise strange * results get reported. At any rate, need to compute the link * names, as otherwise linkage_create() will crash and burn * trying to touch them. */ for (; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded || lifo->N_violations) continue; N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = "Timeout during postprocessing"; } print_time(opts, "Postprocessed all linkages"); if (opts->verbosity > 1) { err_ctxt ec; ec.sent = sent; err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n", N_valid_linkages, N_linkages_post_processed); } sent->num_linkages_post_processed = N_linkages_post_processed; sent->num_valid_linkages = N_valid_linkages; }
/** This does basic post-processing for all linkages. */ static void post_process_linkages(Sentence sent, Parse_Options opts) { size_t in; size_t N_linkages_post_processed = 0; size_t N_valid_linkages = sent->num_valid_linkages; size_t N_linkages_alloced = sent->num_linkages_alloced; bool twopass = sent->length >= opts->twopass_length; /* (optional) First pass: just visit the linkages */ /* The purpose of the first pass is to make the post-processing * more efficient. Because (hopefully) by the time the real work * is done in the 2nd pass, the relevant rule set has been pruned * in the first pass. */ if (twopass) { for (in=0; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded) continue; /* We still need link names, even if there has been a morfo * violation. */ compute_link_names(lkg, sent->string_set); if (lifo->N_violations) continue; post_process_scan_linkage(sent->postprocessor, lkg); if ((49 == in%50) && resources_exhausted(opts->resources)) break; } } /* Second pass: actually perform post-processing */ for (in=0; in < N_linkages_alloced; in++) { PP_node *ppn; Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded) continue; /* Invalid morphism construction */ /* We need link names, even if morfo check fails */ if (!twopass) compute_link_names(lkg, sent->string_set); ppn = do_post_process(sent->postprocessor, lkg, twopass); post_process_free_data(&sent->postprocessor->pp_data); if (NULL != ppn->violation) { N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = ppn->violation; } N_linkages_post_processed++; linkage_score(lkg, opts); if ((9 == in%10) && resources_exhausted(opts->resources)) break; } /* If the timer expired, then we never finished post-processing. * Mark the remaining sentences as bad, as otherwise strange * results get reported. At any rate, need to compute the link * names, as otherwise linkage_create() will crash and burn * trying to touch them. */ for (; in < N_linkages_alloced; in++) { Linkage lkg = &sent->lnkages[in]; Linkage_info *lifo = &lkg->lifo; if (lifo->discarded) continue; if (!twopass) compute_link_names(lkg, sent->string_set); N_valid_linkages--; lifo->N_violations++; /* Set the message, only if not set (e.g. by sane_morphism) */ if (NULL == lifo->pp_violation_msg) lifo->pp_violation_msg = "Timeout during postprocessing"; } print_time(opts, "Postprocessed all linkages"); if (opts->verbosity > 1) { err_ctxt ec; ec.sent = sent; err_msg(&ec, Info, "Info: %zu of %zu linkages with no P.P. violations\n", N_valid_linkages, N_linkages_post_processed); } sent->num_linkages_post_processed = N_linkages_post_processed; sent->num_valid_linkages = N_valid_linkages; }