static int pp_prune(Sentence sent, Parse_Options opts) { pp_knowledge * knowledge; size_t i, w; int total_deleted, N_deleted; bool change, deleteme; multiset_table *cmt; if (sent->postprocessor == NULL) return 0; if (!opts->perform_pp_prune) return 0; knowledge = sent->postprocessor->knowledge; cmt = cms_table_new(); for (w = 0; w < sent->length; w++) { Disjunct *d; for (d = sent->word[w].d; d != NULL; d = d->next) { char dir; d->marked = true; for (dir=0; dir < 2; dir++) { Connector *c; for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next) { insert_in_cms_table(cmt, connector_string(c)); } } } } total_deleted = 0; change = true; while (change) { char dir; change = false; N_deleted = 0; for (w = 0; w < sent->length; w++) { Disjunct *d; for (d = sent->word[w].d; d != NULL; d = d->next) { if (!d->marked) continue; deleteme = false; for (i = 0; i < knowledge->n_contains_one_rules; i++) { pp_rule* rule = &knowledge->contains_one_rules[i]; /* the ith rule */ const char * selector = rule->selector; /* selector string for this rule */ pp_linkset * link_set = rule->link_set; /* the set of criterion links */ if (rule->selector_has_wildcard) continue; /* If it has a * forget it */ for (dir = 0; dir < 2; dir++) { Connector *c; for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next) { if (!post_process_match(selector, connector_string(c))) continue; /* printf("pp_prune: trigger ok. selector = %s c->string = %s\n", selector, c->string); */ /* We know c matches the trigger link of the rule. */ /* Now check the criterion links */ if (!rule_satisfiable(cmt, link_set)) { deleteme = true; rule->use_count++; } if (deleteme) break; } if (deleteme) break; } if (deleteme) break; } if (deleteme) /* now we delete this disjunct */ { N_deleted++; total_deleted++; d->marked = false; /* mark for deletion later */ for (dir=0; dir < 2; dir++) { Connector *c; for (c = ((dir) ? (d->left) : (d->right)); c != NULL; c = c->next) { change |= delete_from_cms_table(cmt, connector_string(c)); } } } } } lgdebug(D_PRUNE, "Debug: pp_prune pass deleted %d\n", N_deleted); } cms_table_delete(cmt); if (total_deleted > 0) { delete_unmarked_disjuncts(sent); if (verbosity_level(D_PRUNE)) { prt_error("\n\\"); prt_error("Debug: After pp_prune:\n\\"); print_disjunct_counts(sent); } } print_time(opts, "pp pruning"); return total_deleted; }
static int pp_prune(Sentence sent, Parse_Options opts) { pp_knowledge *knowledge; multiset_table *cmt; if (sent->postprocessor == NULL) return 0; if (!opts->perform_pp_prune) return 0; knowledge = sent->postprocessor->knowledge; cmt = cms_table_new(); jet_sharing_t *js = &sent->jet_sharing; if (js->table[0] != NULL) { for (int dir = 0; dir < 2; dir++) { for (unsigned int id = 1; id < js->entries[dir] + 1; id++) { for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next) { if (0 == c->refcount) continue; insert_in_cms_table(cmt, c); } } } } else { for (WordIdx w = 0; w < sent->length; w++) { for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next) { for (int dir = 0; dir < 2; dir++) { Connector *first_c = (dir) ? (d->left) : (d->right); for (Connector *c = first_c; c != NULL; c = c->next) { insert_in_cms_table(cmt, c); } } } } } int D_deleted = 0; /* Number of deleted disjuncts */ int Cname_deleted = 0; /* Number of deleted connector names */ /* Since the cms table is unchanged, after applying a rule once we * know if it will be TRUE or FALSE if we need to apply it again. * Values: -1: Undecided yet; 0: Rule unsatisfiable; 1 Rule satisfiable. */ uint8_t *rule_ok = alloca(knowledge->n_contains_one_rules * sizeof(bool)); memset(rule_ok, -1, knowledge->n_contains_one_rules * sizeof(bool)); for (size_t i = 0; i < knowledge->n_contains_one_rules; i++) { if (rule_ok[i] == 1) continue; pp_rule* rule = &knowledge->contains_one_rules[i]; /* The ith rule */ const char *selector = rule->selector; /* Selector string for this rule */ pp_linkset *link_set = rule->link_set; /* The set of criterion links */ unsigned int hash = cms_hash(selector); if (rule->selector_has_wildcard) { rule_ok[i] = 1; continue; /* If it has a * forget it */ } for (Cms *cms = cmt->cms_table[hash]; cms != NULL; cms = cms->next) { Connector *c = cms->c; if (!post_process_match(selector, connector_string(c))) continue; ppdebug("Rule %zu: Selector %s, Connector %s\n", i, selector, connector_string(c)); /* We know c matches the trigger link of the rule. */ /* Now check the criterion links */ if ((rule_ok[i] == 0) || !rule_satisfiable(cmt, link_set)) { rule_ok[i] = 0; ppdebug("DELETE %s refcount %d\n", connector_string(c), c->refcount); c->nearest_word = BAD_WORD; Cname_deleted++; rule->use_count++; } else { rule_ok[i] = 1; break; } } } /* Iterate over all connectors and mark the bad trigger connectors. * If the marked connector is not the shallow one, note that the * shallow one on the same disjunct cannot be marked too (this could * facilitate faster detection by power_prune()) because this would be * wrongly reflected through the cms table. */ if (js->table[0] != NULL) { for (int dir = 0; dir < 2; dir++) { for (unsigned int id = 1; id < js->entries[dir] + 1; id++) { for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next) { if (0 == c->refcount) continue; if (mark_bad_connectors(cmt, c)) { D_deleted++; break; } } } } } else { for (WordIdx w = 0; w < sent->length; w++) { for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next) { for (int dir = 0; dir < 2; dir++) { Connector *first_c = (dir) ? (d->left) : (d->right); for (Connector *c = first_c; c != NULL; c = c->next) { if (mark_bad_connectors(cmt, c)) { D_deleted++; break; } } } } } } lgdebug(+D_PRUNE, "Deleted %d (%d connector names)\n", D_deleted, Cname_deleted); cms_table_delete(cmt); print_time(opts, "pp pruning"); return D_deleted; }