/* Perform one split operation on the given node with the given parent. */ static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node) { /* only buckets may be split */ assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); assert(*parent.flag & NODE_TYPE_TRIE); if (*node.flag & NODE_TYPE_PURE_BUCKET) { /* turn the pure bucket into a hybrid bucket */ parent.t->xs[node.b->c0].t = alloc_trie_node(T, node); /* if the bucket had an empty key, move it to the new trie node */ value_t* val = ahtable_tryget(node.b, NULL, 0); if (val) { parent.t->xs[node.b->c0].t->val = *val; parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL; *val = 0; ahtable_del(node.b, NULL, 0); } node.b->c0 = 0x00; node.b->c1 = TRIE_MAXCHAR; node.b->flag = NODE_TYPE_HYBRID_BUCKET; return; } /* This is a hybrid bucket. Perform a proper split. */ hattrie_split_h(parent, node); }
int hattrie_find_lpr (hattrie_t* T, const char* key, size_t len, value_t** dst) { /* create node stack for traceback */ int ret = -1; size_t sp = 0; node_ptr bs[NODESTACK_INIT]; /* base stack (will be enough mostly) */ node_ptr *ns = bs; /* generic ptr, could point to new mem */ ns[sp] = T->root; *dst = NULL; /* consume trie nodes for key (thus building prefix chain) */ node_ptr node = hattrie_find_ns(&ns, &sp, NODESTACK_INIT, &key, &len); if (node.flag == NULL) { if (sp == 0) { /* empty trie, no prefix match */ if (ns != bs) free(ns); return -1; } node = ns[--sp]; /* dead end, pop node */ } /* search for suffix in current node */ size_t suffix = len; /* suffix length */ if (*node.flag & NODE_TYPE_TRIE) { *dst = &node.t->val; /* use current trie node value */ } else { while (*dst == NULL) { /* find remainder in current ahtable */ *dst = ahtable_tryget(node.b, key, suffix); if (suffix == 0) break; --suffix; } } /* not in current node, need to traceback node stack */ while (*dst == NULL) { node = ns[sp]; /* parent node, always a trie node type */ if (*node.flag & NODE_HAS_VAL) *dst = &node.t->val; if (sp == 0) break; --sp; } if (*dst) { /* prefix found? */ ret = 0; } if (ns != bs) free(ns); return ret; }
value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len) { /* find node for given key */ node_ptr node = hattrie_find(T, &key, &len); if (node.flag == NULL) { return NULL; } /* if the trie node consumes value, use it */ if (*node.flag & NODE_TYPE_TRIE) { return &node.t->val; } return ahtable_tryget(node.b, key, len); }
int hattrie_find_leq (hattrie_t* T, const char* key, size_t len, value_t** dst) { /* create node stack for traceback */ size_t sp = 0; node_ptr bs[NODESTACK_INIT]; /* base stack (will be enough mostly) */ node_ptr *ns = bs; /* generic ptr, could point to new mem */ ns[sp] = T->root; /* find node for given key */ int ret = 1; /* no node on the left matches */ node_ptr node = hattrie_find_ns(&ns, &sp, NODESTACK_INIT, &key, &len); if (node.flag == NULL) { *dst = hattrie_walk(ns, sp, key, hattrie_find_rightmost); if (ns != bs) free(ns); if (*dst) { return -1; /* found previous */ } return 1; /* no previous key found */ } /* assign value from trie or find in table */ if (*node.flag & NODE_TYPE_TRIE) { *dst = &node.t->val; ret = 0; /* found exact match */ } else { *dst = ahtable_tryget(node.b, key, len); if (*dst) { ret = 0; /* found exact match */ } else { /* look for previous in ahtable */ ret = ahtable_find_leq(node.b, key, len, dst); } } /* return if found equal or left in ahtable */ if (*dst == 0) { *dst = hattrie_walk(ns, sp, key, hattrie_find_rightmost); if (*dst) { ret = -1; /* found previous */ } else { ret = 1; /* no previous key found */ } } if (ns != bs) free(ns); return ret; }
void test_ahtable_insert() { fprintf(stderr, "inserting %zu keys ... \n", k); size_t i, j; value_t* u; value_t v; for (j = 0; j < k; ++j) { i = rand() % n; v = 1 + str_map_get(M, xs[i], strlen(xs[i])); str_map_set(M, xs[i], strlen(xs[i]), v); u = ahtable_get(T, xs[i], strlen(xs[i])); *u += 1; if (*u != v) { fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n", *u, v); } } /* delete some keys */ for (j = 0; i < k/100; ++j) { i = rand() % n; ahtable_del(T, xs[i], strlen(xs[i])); str_map_del(M, xs[i], strlen(xs[i])); u = ahtable_tryget(T, xs[i], strlen(xs[i])); if (u) { fprintf(stderr, "[error] deleted node found in ahtable\n"); } } fprintf(stderr, "done.\n"); }
/* Perform one split operation on the given node with the given parent. */ static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node) { /* only buckets may be split */ assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); assert(*parent.flag & NODE_TYPE_TRIE); if (*node.flag & NODE_TYPE_PURE_BUCKET) { /* turn the pure bucket into a hybrid bucket */ parent.t->xs[node.b->c0].t = alloc_trie_node(T, node); /* if the bucket had an empty key, move it to the new trie node */ value_t* val = ahtable_tryget(node.b, NULL, 0); if (val) { parent.t->xs[node.b->c0].t->val = *val; parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL; *val = 0; ahtable_del(node.b, NULL, 0); } node.b->c0 = 0x00; node.b->c1 = NODE_MAXCHAR; node.b->flag = NODE_TYPE_HYBRID_BUCKET; return; } /* This is a hybrid bucket. Perform a proper split. */ /* count the number of occourances of every leading character */ unsigned int cs[NODE_CHILDS]; // occurance count for leading chars memset(cs, 0, NODE_CHILDS * sizeof(unsigned int)); size_t len; const char* key; ahtable_iter_t* i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); assert(len > 0); cs[(unsigned char) key[0]] += 1; ahtable_iter_next(i); } ahtable_iter_free(i); /* choose a split point */ unsigned int left_m, right_m, all_m; unsigned char j = node.b->c0; all_m = ahtable_size(node.b); left_m = cs[j]; right_m = all_m - left_m; int d; while (j + 1 < node.b->c1) { d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1])); if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) { j += 1; left_m += cs[j]; right_m -= cs[j]; } else break; } /* now split into two node cooresponding to ranges [0, j] and * [j + 1, NODE_MAXCHAR], respectively. */ /* create new left and right nodes */ /* TODO: Add a special case if either node is a hybrid bucket containing all * the keys. In such a case, do not build a new table, just use the old one. * */ size_t num_slots; for (num_slots = ahtable_initial_size; (double) left_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); node_ptr left, right; left.b = ahtable_create_n(num_slots); left.b->c0 = node.b->c0; left.b->c1 = j; left.b->flag = left.b->c0 == left.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; for (num_slots = ahtable_initial_size; (double) right_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); right.b = ahtable_create_n(num_slots); right.b->c0 = j + 1; right.b->c1 = node.b->c1; right.b->flag = right.b->c0 == right.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; /* update the parent's pointer */ unsigned int c; for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left; for (; c <= node.b->c1; ++c) parent.t->xs[c] = right; /* distribute keys to the new left or right node */ value_t* u; value_t* v; i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); u = ahtable_iter_val(i); assert(len > 0); /* left */ if ((unsigned char) key[0] <= j) { if (*left.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(left.b, key + 1, len - 1); } else { v = ahtable_get(left.b, key, len); } *v = *u; } /* right */ else { if (*right.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(right.b, key + 1, len - 1); } else { v = ahtable_get(right.b, key, len); } *v = *u; } ahtable_iter_next(i); } ahtable_iter_free(i); ahtable_free(node.b); }