/** * Assumes that the sentence expression lists have been generated. */ void prepare_to_parse(Sentence sent, Parse_Options opts) { size_t i; build_sentence_disjuncts(sent, opts->disjunct_cost); if (verbosity > 2) { printf("After expanding expressions into disjuncts:"); print_disjunct_counts(sent); } print_time(opts, "Built disjuncts"); for (i=0; i<sent->length; i++) { sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d); /* Some long Russian sentences can really blow up, here. */ if (resources_exhausted(opts->resources)) return; } print_time(opts, "Eliminated duplicate disjuncts"); if (verbosity > 2) { printf("\nAfter expression pruning and duplicate elimination:\n"); print_disjunct_counts(sent); } set_connector_length_limits(sent, opts); pp_and_power_prune(sent, opts); }
/** * Increase the number of disjuncts associated to each word in the * sentence by working with word-clusters. Return true if the number * of disjuncts were expanded, else return false. */ bool lg_expand_disjunct_list(Sentence sent) { size_t w; Cluster *clu = lg_cluster_new(); bool expanded = false; for (w = 0; w < sent->length; w++) { X_node * x; Disjunct * d = sent->word[w].d; for (x = sent->word[w].x; x != NULL; x = x->next) { Disjunct *dx = build_expansion_disjuncts(clu, x); if (dx) { unsigned int cnt = count_disjuncts(d); d = catenate_disjuncts(dx, d); d = eliminate_duplicate_disjuncts(d); if (cnt < count_disjuncts(d)) expanded = true; } } sent->word[w].d = d; } lg_cluster_delete(clu); return expanded; }
/** * Assumes that the sentence expression lists have been generated. */ void prepare_to_parse(Sentence sent, Parse_Options opts) { size_t i; build_sentence_disjuncts(sent, opts->disjunct_cost, opts); if (verbosity_level(5)) { prt_error("Debug: After expanding expressions into disjuncts:\n"); print_disjunct_counts(sent); } print_time(opts, "Built disjuncts"); for (i=0; i<sent->length; i++) { sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d); /* Some long Russian sentences can really blow up, here. */ if (resources_exhausted(opts->resources)) return; } print_time(opts, "Eliminated duplicate disjuncts"); if (verbosity_level(5)) { prt_error("Debug: After expression pruning and duplicate elimination:\n"); print_disjunct_counts(sent); } gword_record_in_connector(sent); setup_connectors(sent); }
void my_prepare_to_parse(Sentence sent, Parse_Options opts) { /* assumes that the sentence expression lists have been generated */ /* this does all the necessary pruning and building of and */ /* structures. */ int i, has_conjunction; // build_sentence_disjuncts(sent, opts->disjunct_cost); // if (verbosity > 2) { //printf("After expanding expressions into disjuncts:") ; //print_disjunct_counts(sent); // } print_time(opts, "Built disjuncts"); for (i=0; i<sent->length; i++) { sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d); } print_time(opts, "Eliminated duplicate disjuncts"); if (verbosity > 2) { printf("\nAfter expression pruning and duplicate elimination:\n"); print_disjunct_counts(sent); } null_links = (opts->min_null_count > 0); has_conjunction = sentence_contains_conjunction(sent); set_connector_length_limits(sent, opts); build_deletable(sent, has_conjunction); build_effective_dist(sent, has_conjunction); /* why do we do these here instead of in first_prepare_to_parse() only? The reason is that the deletable region depends on if null links are in use. with null_links everything is deletable */ if (!has_conjunction) { pp_and_power_prune(sent, RUTHLESS, opts); } else { pp_and_power_prune(sent, GENTLE, opts); /*if (verbosity > 2) { printf("\nAfter Gentle power pruning:\n"); print_disjunct_counts(sent); } */ /*print_time(opts, "Finished gentle power pruning"); */ conjunction_prune(sent, opts); if (verbosity > 2) { printf("\nAfter conjunction pruning:\n"); print_disjunct_counts(sent); print_statistics(); } print_time(opts, "Done conjunction pruning"); build_conjunction_tables(sent); install_fat_connectors(sent); install_special_conjunctive_connectors(sent); if (verbosity > 2) { printf("After conjunctions, disjuncts counts:\n"); print_disjunct_counts(sent); } set_connector_length_limits(sent, opts); /* have to do this again cause of the new fat connectors and disjuncts */ print_time(opts, "Constructed fat disjuncts"); prune(sent); print_time(opts, "Pruned fat disjuncts"); for (i=0; i<sent->length; i++) { sent->word[i].d = eliminate_duplicate_disjuncts(sent->word[i].d); } if (verbosity > 2) { printf("After pruning and duplicate elimination:\n"); print_disjunct_counts(sent); } print_time(opts, "Eliminated duplicate disjuncts (again)"); if (verbosity > 2) print_AND_statistics(sent); power_prune(sent, RUTHLESS, opts); } /* if (verbosity > 2) { printf("\nAfter RUTHLESS power-pruning:\n"); print_disjunct_counts(sent); } */ /* print time for power pruning used to be here */ /* now done in power_prune itself */ print_time(opts, "Initialized fast matcher and hash table"); }