static void insert_in_cms_table(multiset_table *cmt, Connector *c) { Cms *cms, *prev = NULL; unsigned int h = cms_hash(connector_string(c)); for (cms = cmt->cms_table[h]; cms != NULL; cms = cms->next) { if (c->desc == cms->c->desc) break; prev = cms; } if (cms == NULL) { cms = (Cms *) xalloc(sizeof(Cms)); cms->c = c; cms->next = cmt->cms_table[h]; cmt->cms_table[h] = cms; } else { /* MRU order */ if (prev != NULL) { prev->next = cms->next; cms->next = cmt->cms_table[h]; cmt->cms_table[h] = cms; } } }
static Cms * lookup_in_cms_table(multiset_table *cmt, const char * str) { Cms * cms; for (cms = cmt->cms_table[cms_hash(str)]; cms != NULL; cms = cms->next) { if (string_set_cmp(str, cms->name)) return cms; } return NULL; }
/** * This returns TRUE if there is a connector name C in the table * such that post_process_match(pp_match_name, C) is TRUE */ static bool match_in_cms_table(multiset_table *cmt, const char * pp_match_name) { Cms * cms; for (cms = cmt->cms_table[cms_hash(pp_match_name)]; cms != NULL; cms = cms->next) { if (post_process_match(pp_match_name, cms->name)) return true; } return false; }
/* FIXME? There is some code duplication here and in insert_in_cms_table() * but it seems cumbersome to fix it. */ static Cms *lookup_in_cms_table(multiset_table *cmt, Connector *c) { unsigned int h = cms_hash(connector_string(c)); for (Cms *cms = cmt->cms_table[h]; cms != NULL; cms = cms->next) { if (c->desc == cms->c->desc) return cms; } return NULL; }
static void insert_in_cms_table(multiset_table *cmt, const char * str) { Cms * cms; unsigned int h; cms = lookup_in_cms_table(cmt, str); if (cms != NULL) { cms->count++; } else { cms = (Cms *) xalloc(sizeof(Cms)); cms->name = str; /* don't copy the string...just keep a pointer to it. we won't free these later */ cms->count = 1; h = cms_hash(str); cms->next = cmt->cms_table[h]; cmt->cms_table[h] = cms; } }
/** * Returns TRUE iff there is a connector name c in the table * that can create a link x such that post_process_match(pp_link, x) is TRUE. */ static bool match_in_cms_table(multiset_table *cmt, const char *pp_link, const char *c) { unsigned int h = cms_hash(pp_link); for (Cms *cms = cmt->cms_table[h]; cms != NULL; cms = cms->next) { if (can_form_link(pp_link, connector_string(cms->c), c)) { ppdebug("MATCHED %s\n", connector_string(cms->c)); return true; } ppdebug("NOT-MATCHED %s \n", connector_string(cms->c)); } return false; }
_Bool cms_incr(const CMS * const cms, const char * const item, const size_t item_len) { uint64_t hashes[2]; size_t k_i = (size_t) 0U; size_t offset; _Bool overflow = 0; do { offset = (size_t) (cms_hash(cms, hashes, item, item_len, k_i) % cms->vector_entries); if (cms->vector[offset] >= CMSCOUNT_MAX) { overflow = 1; } else { cms->vector[offset]++; } } while (++k_i < cms->k_num); return overflow; }
CMSCount cms_count(const CMS * const cms, const char * const item, const size_t item_len) { uint64_t hashes[2]; size_t k_i = (size_t) 0U; size_t offset; CMSCount min = 0; CMSCount val; _Bool min_set = 0; do { offset = (size_t) (cms_hash(cms, hashes, item, item_len, k_i) % cms->vector_entries); val = cms->vector[offset]; if (min_set == 0 || val < min) { min_set = 1; min = val; } } while (++k_i < cms->k_num); return min; }
static int pp_prune(Sentence sent, Parse_Options opts) { pp_knowledge *knowledge; multiset_table *cmt; if (sent->postprocessor == NULL) return 0; if (!opts->perform_pp_prune) return 0; knowledge = sent->postprocessor->knowledge; cmt = cms_table_new(); jet_sharing_t *js = &sent->jet_sharing; if (js->table[0] != NULL) { for (int dir = 0; dir < 2; dir++) { for (unsigned int id = 1; id < js->entries[dir] + 1; id++) { for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next) { if (0 == c->refcount) continue; insert_in_cms_table(cmt, c); } } } } else { for (WordIdx w = 0; w < sent->length; w++) { for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next) { for (int dir = 0; dir < 2; dir++) { Connector *first_c = (dir) ? (d->left) : (d->right); for (Connector *c = first_c; c != NULL; c = c->next) { insert_in_cms_table(cmt, c); } } } } } int D_deleted = 0; /* Number of deleted disjuncts */ int Cname_deleted = 0; /* Number of deleted connector names */ /* Since the cms table is unchanged, after applying a rule once we * know if it will be TRUE or FALSE if we need to apply it again. * Values: -1: Undecided yet; 0: Rule unsatisfiable; 1 Rule satisfiable. */ uint8_t *rule_ok = alloca(knowledge->n_contains_one_rules * sizeof(bool)); memset(rule_ok, -1, knowledge->n_contains_one_rules * sizeof(bool)); for (size_t i = 0; i < knowledge->n_contains_one_rules; i++) { if (rule_ok[i] == 1) continue; pp_rule* rule = &knowledge->contains_one_rules[i]; /* The ith rule */ const char *selector = rule->selector; /* Selector string for this rule */ pp_linkset *link_set = rule->link_set; /* The set of criterion links */ unsigned int hash = cms_hash(selector); if (rule->selector_has_wildcard) { rule_ok[i] = 1; continue; /* If it has a * forget it */ } for (Cms *cms = cmt->cms_table[hash]; cms != NULL; cms = cms->next) { Connector *c = cms->c; if (!post_process_match(selector, connector_string(c))) continue; ppdebug("Rule %zu: Selector %s, Connector %s\n", i, selector, connector_string(c)); /* We know c matches the trigger link of the rule. */ /* Now check the criterion links */ if ((rule_ok[i] == 0) || !rule_satisfiable(cmt, link_set)) { rule_ok[i] = 0; ppdebug("DELETE %s refcount %d\n", connector_string(c), c->refcount); c->nearest_word = BAD_WORD; Cname_deleted++; rule->use_count++; } else { rule_ok[i] = 1; break; } } } /* Iterate over all connectors and mark the bad trigger connectors. * If the marked connector is not the shallow one, note that the * shallow one on the same disjunct cannot be marked too (this could * facilitate faster detection by power_prune()) because this would be * wrongly reflected through the cms table. */ if (js->table[0] != NULL) { for (int dir = 0; dir < 2; dir++) { for (unsigned int id = 1; id < js->entries[dir] + 1; id++) { for (Connector *c = js->table[dir][id].c; NULL != c; c = c->next) { if (0 == c->refcount) continue; if (mark_bad_connectors(cmt, c)) { D_deleted++; break; } } } } } else { for (WordIdx w = 0; w < sent->length; w++) { for (Disjunct *d = sent->word[w].d; d != NULL; d = d->next) { for (int dir = 0; dir < 2; dir++) { Connector *first_c = (dir) ? (d->left) : (d->right); for (Connector *c = first_c; c != NULL; c = c->next) { if (mark_bad_connectors(cmt, c)) { D_deleted++; break; } } } } } } lgdebug(+D_PRUNE, "Deleted %d (%d connector names)\n", D_deleted, Cname_deleted); cms_table_delete(cmt); print_time(opts, "pp pruning"); return D_deleted; }