/** * Do the following pruning steps until nothing happens: * power pp power pp power pp.... * Make sure you do them both at least once. */ void pp_and_power_prune(Sentence sent, Parse_Options opts) { power_prune(sent, opts); pp_prune(sent, opts); return; // Not reached. We can actually gain a few percent of // performance be skipping the loop below. Mostly, it just // does a lot of work, and pretty much finds nothing. // And so we skip it. #ifdef ONLY_IF_YOU_THINK_THIS_IS_WORTH_IT for (;;) { if (pp_prune(sent, opts) == 0) break; if (power_prune(sent, opts) == 0) break; } #endif }
/** * Prune useless disjuncts. */ void pp_and_power_prune(Sentence sent, Parse_Options opts) { power_table pt; power_table_alloc(sent, &pt); power_table_init(sent, &pt); power_prune(sent, opts, &pt); if (pp_prune(sent, opts) > 0) power_prune(sent, opts, &pt); /* No benefit for now to make additional pp_prune() & power_prune() - * additional deletions are very rare and even then most of the * times only one disjunct is deleted. */ power_table_delete(&pt); return; }
void my_prepare_to_parse(Sentence sent, Parse_Options opts) { /* assumes that the sentence expression lists have been generated */ /* this does all the necessary pruning and building of and */ /* structures. */ int i, has_conjunction; // build_sentence_disjuncts(sent, opts->disjunct_cost); // if (verbosity > 2) { //printf("After expanding expressions into disjuncts:") ; //print_disjunct_counts(sent); // } print_time(opts, "Built disjuncts"); for (i=0; i<sent->length; i++) { sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d); } print_time(opts, "Eliminated duplicate disjuncts"); if (verbosity > 2) { printf("\nAfter expression pruning and duplicate elimination:\n"); print_disjunct_counts(sent); } null_links = (opts->min_null_count > 0); has_conjunction = sentence_contains_conjunction(sent); set_connector_length_limits(sent, opts); build_deletable(sent, has_conjunction); build_effective_dist(sent, has_conjunction); /* why do we do these here instead of in first_prepare_to_parse() only? The reason is that the deletable region depends on if null links are in use. with null_links everything is deletable */ if (!has_conjunction) { pp_and_power_prune(sent, RUTHLESS, opts); } else { pp_and_power_prune(sent, GENTLE, opts); /*if (verbosity > 2) { printf("\nAfter Gentle power pruning:\n"); print_disjunct_counts(sent); } */ /*print_time(opts, "Finished gentle power pruning"); */ conjunction_prune(sent, opts); if (verbosity > 2) { printf("\nAfter conjunction pruning:\n"); print_disjunct_counts(sent); print_statistics(); } print_time(opts, "Done conjunction pruning"); build_conjunction_tables(sent); install_fat_connectors(sent); install_special_conjunctive_connectors(sent); if (verbosity > 2) { printf("After conjunctions, disjuncts counts:\n"); print_disjunct_counts(sent); } set_connector_length_limits(sent, opts); /* have to do this again cause of the new fat connectors and disjuncts */ print_time(opts, "Constructed fat disjuncts"); prune(sent); print_time(opts, "Pruned fat disjuncts"); for (i=0; i<sent->length; i++) { sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d); } if (verbosity > 2) { printf("After pruning and duplicate elimination:\n"); print_disjunct_counts(sent); } print_time(opts, "Eliminated duplicate disjuncts (again)"); if (verbosity > 2) print_AND_statistics(sent); power_prune(sent, RUTHLESS, opts); } /* if (verbosity > 2) { printf("\nAfter RUTHLESS power-pruning:\n"); print_disjunct_counts(sent); } */ /* print time for power pruning used to be here */ /* now done in power_prune itself */ print_time(opts, "Initialized fast matcher and hash table"); }