/** * Prune useless disjuncts. */ void pp_and_power_prune(Sentence sent, Parse_Options opts) { power_table pt; power_table_alloc(sent, &pt); power_table_init(sent, &pt); power_prune(sent, opts, &pt); if (pp_prune(sent, opts) > 0) power_prune(sent, opts, &pt); /* No benefit for now to make additional pp_prune() & power_prune() - * additional deletions are very rare and even then most of the * times only one disjunct is deleted. */ power_table_delete(&pt); return; }
/** The return value is the number of disjuncts deleted. * Implementation notes: * Normally all the identical disjunct-jets are memory shared. * The suffix_id of each connector serves as its reference count * in the power table. Each time when a connector that cannot match * is discovered, its reference count is decreased, and its * nearest_word field is assigned BAD_WORD. Due to the memory sharing, * each such an assignment affects immediately all the identical * disjunct-jets. * */ static int power_prune(Sentence sent, Parse_Options opts) { power_table pt; prune_context pc; int N_deleted[2] = {0}; /* [0] counts first deletions, [1] counts dups. */ int total_deleted = 0; power_table_alloc(sent, &pt); power_table_init(sent, &pt); pc.pt = &pt; pc.power_cost = 0; pc.null_links = (opts->min_null_count > 0); pc.N_changed = 1; /* forces it always to make at least two passes */ pc.sent = sent; while (1) { /* left-to-right pass */ for (WordIdx w = 0; w < sent->length; w++) { for (Disjunct **dd = &sent->word[w].d; *dd != NULL; /* See: NEXT */) { Disjunct *d = *dd; /* just for convenience */ if (d->left == NULL) { dd = &d->next; /* NEXT */ continue; } bool is_bad = d->left->nearest_word == BAD_WORD; if (is_bad || left_connector_list_update(&pc, d->left, w, true) < 0) { mark_connector_sequence_for_dequeue(d->left, true); mark_connector_sequence_for_dequeue(d->right, false); /* discard the current disjunct */ *dd = d->next; /* NEXT - set current disjunct to the next one */ N_deleted[(int)is_bad]++; continue; } dd = &d->next; /* NEXT */ } clean_table(pt.r_table_size[w], pt.r_table[w]); } total_deleted += N_deleted[0] + N_deleted[1]; lgdebug(D_PRUNE, "Debug: l->r pass changed %d and deleted %d (%d+%d)\n", pc.N_changed, N_deleted[0]+N_deleted[1], N_deleted[0], N_deleted[1]); if (pc.N_changed == 0 && N_deleted[0] == 0 && N_deleted[1] == 0) break; pc.N_changed = N_deleted[0] = N_deleted[1] = 0; /* right-to-left pass */ for (WordIdx w = sent->length-1; w != (WordIdx) -1; w--) { for (Disjunct **dd = &sent->word[w].d; *dd != NULL; /* See: NEXT */) { Disjunct *d = *dd; /* just for convenience */ if (d->right == NULL) { dd = &d->next; /* NEXT */ continue; } bool is_bad = d->right->nearest_word == BAD_WORD; if (is_bad || right_connector_list_update(&pc, d->right, w, true) >= sent->length) { mark_connector_sequence_for_dequeue(d->right, true); mark_connector_sequence_for_dequeue(d->left, false); /* Discard the current disjunct. */ *dd = d->next; /* NEXT - set current disjunct to the next one */ N_deleted[(int)is_bad]++; continue; } dd = &d->next; /* NEXT */ } clean_table(pt.l_table_size[w], pt.l_table[w]); } total_deleted += N_deleted[0] + N_deleted[1]; lgdebug(D_PRUNE, "Debug: r->l pass changed %d and deleted %d (%d+%d)\n", pc.N_changed, N_deleted[0]+N_deleted[1], N_deleted[0], N_deleted[1]); if (pc.N_changed == 0 && N_deleted[0] == 0 && N_deleted[1] == 0) break; pc.N_changed = N_deleted[0] = N_deleted[1] = 0; } power_table_delete(&pt); lgdebug(D_PRUNE, "Debug: power prune cost: %d\n", pc.power_cost); print_time(opts, "power pruned"); if (verbosity_level(D_PRUNE)) { prt_error("\n\\"); prt_error("Debug: After power_pruning:\n\\"); print_disjunct_counts(sent); } #ifdef DEBUG for (WordIdx w = 0; w < sent->length; w++) { for (Disjunct *d = sent->word[w].d; NULL != d; d = d->next) { for (Connector *c = d->left; NULL != c; c = c->next) assert(c->nearest_word != BAD_WORD); for (Connector *c = d->right; NULL != c; c = c->next) assert(c->nearest_word != BAD_WORD); } } #endif return total_deleted; }