struct k_node *k_lookup(struct x_node *x0, struct x_node *x1, int flags) { struct hash_table *t = &k_hash_table; size_t hash = pair_hash(x0->x_hash, x1->x_hash, t->t_shift); struct hlist_head *head = t->t_table + (hash & t->t_mask); struct hlist_node *node; struct k_node *k; hlist_for_each_entry(k, node, head, k_hash_node) { if (k->k_x[0] == x0 && k->k_x[1] == x1) return k; } if (!(flags & L_CREATE)) return NULL; if (x_which(x0) != 0 || x_which(x1) != 1) { errno = EINVAL; return NULL; } k = malloc(sizeof(*k)); if (k == NULL) return NULL; /* k_init() */ memset(k, 0, sizeof(*k)); hlist_add_head(&k->k_hash_node, head); k->k_x[0] = x0; k->k_x[1] = x1; INIT_LIST_HEAD(&k->k_sub_list); nr_k++; return k; }
/** returns the pointer to this info, NULL if not there */ static Table_connector * find_table_pointer(count_context_t *ctxt, int lw, int rw, Connector *le, Connector *re, unsigned int null_count) { Table_connector *t; unsigned int h = pair_hash(ctxt->table_size,lw, rw, le, re, null_count); t = ctxt->table[h]; for (; t != NULL; t = t->next) { if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re) && (t->null_count == null_count)) return t; } /* Create a new connector only if resources are exhausted. * (???) Huh? I guess we're in panic parse mode in that case. * checktimer is a device to avoid a gazillion system calls * to get the timer value. On circa-2009 machines, it results * in maybe 5-10 timer calls per second. */ ctxt->checktimer ++; if (ctxt->exhausted || ((0 == ctxt->checktimer%450100) && (ctxt->current_resources != NULL) && resources_exhausted(ctxt->current_resources))) { ctxt->exhausted = true; t = table_store(ctxt, lw, rw, le, re, null_count); t->count = hist_zero(); return t; } else return NULL; }
/** * Returns the pointer to this info, NULL if not there. */ static X_table_connector * x_table_pointer(int lw, int rw, Connector *le, Connector *re, int cost, Parse_info pi) { X_table_connector *t; t = pi->x_table[pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost)]; for (; t != NULL; t = t->next) { if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re) && (t->cost == cost)) return t; } return NULL; }
/** * Returns the pointer to this info, NULL if not there. */ static X_table_connector * x_table_pointer(int lw, int rw, Connector *le, Connector *re, unsigned int null_count, Parse_info pi) { X_table_connector *t; t = pi->x_table[pair_hash(pi->x_table_size, lw, rw, le, re, null_count)]; for (; t != NULL; t = t->next) { if ((t->set.lw == lw) && (t->set.rw == rw) && (t->set.le == le) && (t->set.re == re) && (t->set.null_count == null_count)) return t; } return NULL; }
/** * Stores the value in the x_table. Assumes it's not already there. */ static X_table_connector * x_table_store(int lw, int rw, Connector *le, Connector *re, int cost, Parse_set * set, Parse_info pi) { X_table_connector *t, *n; int h; n = (X_table_connector *) xalloc(sizeof(X_table_connector)); n->set = set; n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost; h = pair_hash(pi->log2_x_table_size, lw, rw, le, re, cost); t = pi->x_table[h]; n->next = t; pi->x_table[h] = n; return n; }
/** * Stores the value in the table. Assumes it's not already there. */ static Table_connector * table_store(count_context_t *ctxt, int lw, int rw, Connector *le, Connector *re, unsigned int null_count) { Table_connector *t, *n; unsigned int h; n = (Table_connector *) xalloc(sizeof(Table_connector)); n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->null_count = null_count; h = pair_hash(ctxt->table_size, lw, rw, le, re, null_count); t = ctxt->table[h]; n->next = t; ctxt->table[h] = n; return n; }
/** * Stores the value in the table. Assumes it's not already there. */ static Table_connector * table_store(count_context_t *ctxt, int lw, int rw, Connector *le, Connector *re, int cost, s64 count) { Table_connector *t, *n; int h; n = (Table_connector *) xalloc(sizeof(Table_connector)); n->count = count; n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->cost = cost; h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost); t = ctxt->table[h]; n->next = t; ctxt->table[h] = n; return n; }
/** * Stores the value in the x_table. Assumes it's not already there. */ static X_table_connector * x_table_store(int lw, int rw, Connector *le, Connector *re, unsigned int null_count, Parse_info pi) { X_table_connector *t, *n; unsigned int h; n = (X_table_connector *) xalloc(sizeof(X_table_connector)); n->set = empty_set(); n->lw = lw; n->rw = rw; n->le = le; n->re = re; n->null_count = null_count; h = pair_hash(pi->log2_x_table_size, lw, rw, le, re, null_count); t = pi->x_table[h]; n->next = t; pi->x_table[h] = n; return n; }
/** * Stores the value in the x_table. Assumes it's not already there. */ static X_table_connector * x_table_store(int lw, int rw, Connector *le, Connector *re, unsigned int null_count, Parse_info pi) { X_table_connector *t, *n; unsigned int h; n = (X_table_connector *) xalloc(sizeof(X_table_connector)); n->set.lw = lw; n->set.rw = rw; n->set.null_count = null_count; n->set.le = le; n->set.re = re; n->set.count = 0; n->set.first = NULL; n->set.tail = NULL; h = pair_hash(pi->x_table_size, lw, rw, le, re, null_count); t = pi->x_table[h]; n->next = t; pi->x_table[h] = n; return n; }
/** returns the pointer to this info, NULL if not there */ static Table_connector * find_table_pointer(count_context_t *ctxt, int lw, int rw, Connector *le, Connector *re, int cost) { Table_connector *t; int h = pair_hash(ctxt->log2_table_size,lw, rw, le, re, cost); t = ctxt->table[h]; for (; t != NULL; t = t->next) { if ((t->lw == lw) && (t->rw == rw) && (t->le == le) && (t->re == re) && (t->cost == cost)) return t; } /* Create a new connector only if resources are exhausted. * (???) Huh? I guess we're in panic parse mode in that case. */ if ((ctxt->current_resources != NULL) && resources_exhausted(ctxt->current_resources)) { return table_store(ctxt, lw, rw, le, re, cost, 0); } else return NULL; }
void snap::CoOccurrenceMatrix::add_program(const std::string &text, int distance) { hasher.load_text(text); // get match positions std::map<std::string, std::vector<int>> raw_match_positions = snap::find(patterns, text); std::map<std::string, std::vector<int>> match_positions = snap::evaluate_expressions(expressions, raw_match_positions); // put all the positions in one vector std::vector<std::pair<int, std::string>> positions; for (std::pair<std::string, std::vector<int>> position : match_positions) { for (int idx : position.second) positions.emplace_back(idx, position.first); } std::sort(positions.begin(), positions.end()); std::unordered_set<std::string> program_added_pairs; std::unordered_set<std::string> context_added_pairs; for (auto it0 = positions.begin(); it0 != positions.end(); ++it0) { for (auto it1(it0); it1 >= positions.begin() && (it0 -> first) - (it1 -> first) <= distance; --it1) { if ((it0 -> first) == (it1 -> first)) { // if points to the same match ++std::get<2>(cooccurrences[it0 -> second][it1 -> second]); // program match cnt if (program_added_pairs.count(it0 -> second) == 0) { ++std::get<1>(cooccurrences[it0 -> second][it1 -> second]); program_added_pairs.insert(it0 -> second); } // context cnt int left_hash = hasher.hash(it0 -> first - left_hash_width, it0 -> first); int right_hash = hasher.hash(it0 -> first, it0 -> first + right_hash_width); int left_hash_cnt = left_hash_cnts[it0 -> second][left_hash]++; int right_hash_cnt = right_hash_cnts[it0 -> second][right_hash]++; if (left_hash_cnt == 0 && right_hash_cnt == 0 && context_added_pairs.count(it0 -> second) == 0) { ++std::get<0>(cooccurrences[it0 -> second][it1 -> second]); context_added_pairs.insert(it0 -> second); } } else if ((it0 -> second) != (it1 -> second)) { std::string expressionA, expressionB; if ((it0 -> second) < (it1 -> second)) { expressionA = it0 -> second; expressionB = it1 -> second; } else { // (it0 -> second) > (it1 -> second) expressionA = it1 -> second; expressionB = it0 -> second; } std::string paired_string_hash(expressionA + "|" + expressionB); // total count ++std::get<2>(cooccurrences[expressionA][expressionB]); // program match count if (program_added_pairs.count(paired_string_hash) == 0) { ++std::get<1>(cooccurrences[expressionA][expressionB]); program_added_pairs.insert(paired_string_hash); } // context match // left 1, left 0 long long ll_hash = pair_hash(hasher.hash(it1 -> first - left_hash_width, it1 -> first), hasher.hash(it0 -> first - left_hash_width, it0 -> first)); int ll_hash_cnt = std::get<0>(pair_hash_cnts[it1 -> second][it0 -> second])[ll_hash]++; // left 1, right 0 long long lr_hash = pair_hash(hasher.hash(it1 -> first - left_hash_width, it1 -> first), hasher.hash(it0 -> first, it0 -> first + right_hash_width)); int lr_hash_cnt = std::get<1>(pair_hash_cnts[it1 -> second][it0 -> second])[lr_hash]++; // right 1, left 0 long long rl_hash = pair_hash(hasher.hash(it1 -> first, it1 -> first + right_hash_width), hasher.hash(it0 -> first - left_hash_width, it0 -> first)); int rl_hash_cnt = std::get<2>(pair_hash_cnts[it1 -> second][it0 -> second])[rl_hash]++; // right 1, right 0 long long rr_hash = pair_hash(hasher.hash(it1 -> first, it1 -> first + right_hash_width), hasher.hash(it0 -> first, it0 -> first + right_hash_width)); int rr_hash_cnt = std::get<3>(pair_hash_cnts[it1 -> second][it0 -> second])[rr_hash]++; if (ll_hash_cnt == 0 && lr_hash_cnt == 0 && rl_hash_cnt == 0 && rr_hash_cnt == 0 && context_added_pairs.count(paired_string_hash) == 0) { ++std::get<0>(cooccurrences[expressionA][expressionB]); context_added_pairs.insert(paired_string_hash); } } } } }