void hattrie_iter_next(hattrie_iter_t* i) { if (hattrie_iter_finished(i)) return; if (i->i != NULL && !ahtable_iter_finished(i->i)) { ahtable_iter_next(i->i); } else if (i->has_nil_key) { i->has_nil_key = false; i->nil_val = 0; hattrie_iter_nextnode(i); } while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) && i->stack != NULL ) { ahtable_iter_free(i->i); i->i = NULL; hattrie_iter_nextnode(i); } if (i->i != NULL && ahtable_iter_finished(i->i)) { ahtable_iter_free(i->i); i->i = NULL; } }
hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T, bool sorted) { hattrie_iter_t* i = malloc_or_die(sizeof(hattrie_iter_t)); i->T = T; i->sorted = sorted; i->i = NULL; i->keysize = 16; i->key = malloc_or_die(i->keysize * sizeof(char)); i->level = 0; i->has_nil_key = false; i->nil_val = 0; i->stack = malloc_or_die(sizeof(hattrie_node_stack_t)); i->stack->next = NULL; i->stack->node = T->root; i->stack->c = '\0'; i->stack->level = 0; while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) && i->stack != NULL ) { ahtable_iter_free(i->i); i->i = NULL; hattrie_iter_nextnode(i); } if (i->i != NULL && ahtable_iter_finished(i->i)) { ahtable_iter_free(i->i); i->i = NULL; } return i; }
/** next non-nil-key node * TODO pick a better name */ static void hattrie_iter_step(hattrie_iter_t* i) { while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) && i->stack != NULL ) { ahtable_iter_free(i->i); i->i = NULL; hattrie_iter_nextnode(i); } if (i->i != NULL && ahtable_iter_finished(i->i)) { ahtable_iter_free(i->i); i->i = NULL; } }
void test_ahtable_find_prev() { fprintf(stderr, "finding prev for %zu keys ... \n", k); ahtable_build_index(T); ahtable_iter_t i; ahtable_iter_begin(T, &i, true); value_t* u; const char *key = NULL; char *dkey = NULL; size_t len = 0; while (!ahtable_iter_finished(&i)) { u = ahtable_iter_val(&i); key = ahtable_iter_key(&i, &len); /* increase key last byte by 1 and check result */ dkey = realloc(dkey, len); memcpy(dkey, key, len); ++dkey[len-1]; value_t *fp = NULL; int r = ahtable_find_leq(T, dkey, len, &fp); if (*fp != *u || r != -1) { fprintf(stderr, "[error] ahtable_find_leq should find %lu, " "but found prev=%lu and return -1, returned %d\n", *u, *fp, r); } ahtable_iter_next(&i); } ahtable_iter_free(&i); free(dkey); fprintf(stderr, "done.\n"); }
static void hattrie_split_fill(node_ptr src, node_ptr left, node_ptr right, uint8_t split) { /* right should be most of the time hybrid */ /* keep or distribute keys to the new right node */ value_t* u; const char* key; size_t len; ahtable_iter_t i; ahtable_iter_begin(src.b, &i, false); while (!ahtable_iter_finished(&i)) { key = ahtable_iter_key(&i, &len); u = ahtable_iter_val(&i); assert(len > 0); /* first char > split_point, move to the right */ if ((unsigned char) key[0] > split) { if (src.b != right.b) { /* insert to right (new bucket) */ if (*right.flag & NODE_TYPE_PURE_BUCKET) { ahtable_insert(right.b, key + 1, len - 1, *u); } else { ahtable_insert(right.b, key, len, *u); } /* transferred to right (from reused) */ if (src.b == left.b) { ahtable_iter_del(&i); continue; } } /* keep the node in right */ } else { if (src.b != left.b) { /* insert to left (new bucket) */ if (*left.flag & NODE_TYPE_PURE_BUCKET) { ahtable_insert(left.b, key + 1, len - 1, *u); } else { ahtable_insert(left.b, key, len, *u); } /* transferred to left (from reused) */ if (src.b == right.b) { ahtable_iter_del(&i); continue; } } /* keep the node in left */ } ahtable_iter_next(&i); } ahtable_iter_free(&i); }
void hattrie_walk (hattrie_t* T, const char* key, size_t len, void* user_data, hattrie_walk_cb cb) { unsigned char* k = (unsigned char*)key; node_ptr node = T->root; size_t i, j; ahtable_iter_t* it; /* go down until a bucket is reached */ for (i = 0; i < len; i++, k++) { if (!(*node.flag & NODE_TYPE_TRIE)) break; node = node.t->xs[*k]; if (*node.flag & NODE_HAS_VAL) { if (hattrie_walk_stop == cb(key, i, &node.t->val, user_data)) return; } } if (i == len) return; assert(i); if (*node.flag & NODE_TYPE_HYBRID_BUCKET) { i--; k--; } else { assert(*node.flag & NODE_TYPE_PURE_BUCKET); } /* dict order ensured short => long */ it = ahtable_iter_begin(node.b, true); for(; !ahtable_iter_finished(it); ahtable_iter_next(it)) { size_t stored_len; unsigned char* stored_key = (unsigned char*)ahtable_iter_key(it, &stored_len); int matched = 1; if (stored_len + i > len) { continue; } for (j = 0; j < stored_len; j++) { if (stored_key[j] != k[j]) { matched = 0; break; } } if (matched) { value_t* val = ahtable_iter_val(it); if (hattrie_walk_stop == cb(key, i + stored_len, val, user_data)) { ahtable_iter_free(it); return; } } } ahtable_iter_free(it); }
void test_ahtable_sorted_iteration() { fprintf(stderr, "iterating in order through %zu keys ... \n", k); ahtable_iter_t i; ahtable_iter_begin(T, &i, true); size_t count = 0; value_t* u; value_t v; char* prev_key = malloc(m_high + 1); size_t prev_len = 0; const char *key = NULL; size_t len = 0; while (!ahtable_iter_finished(&i)) { memcpy(prev_key, key, len); prev_len = len; ++count; key = ahtable_iter_key(&i, &len); if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) { fprintf(stderr, "[error] iteration is not correctly ordered.\n"); } u = ahtable_iter_val(&i); v = str_map_get(M, key, len); if (*u != v) { if (v == 0) { fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); } else { fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); } } // this way we will see an error if the same key is iterated through // twice str_map_set(M, key, len, 0); ahtable_iter_next(&i); } ahtable_iter_free(&i); free(prev_key); fprintf(stderr, "done.\n"); }
void test_ahtable_iteration() { fprintf(stderr, "iterating through %zu keys ... \n", k); ahtable_iter_t i; ahtable_iter_begin(T, &i, false); size_t count = 0; value_t* u; value_t v; size_t len; const char* key; while (!ahtable_iter_finished(&i)) { ++count; key = ahtable_iter_key(&i, &len); u = ahtable_iter_val(&i); v = str_map_get(M, key, len); if (*u != v) { if (v == 0) { fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); } else { fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); } } // this way we will see an error if the same key is iterated through // twice str_map_set(M, key, len, 0); ahtable_iter_next(&i); } if (count != M->m) { fprintf(stderr, "[error] iterated through %zu element, expected %zu\n", count, M->m); } ahtable_iter_free(&i); fprintf(stderr, "done.\n"); }
void hattrie_iter_next(hattrie_iter_t* i) { do { if (hattrie_iter_finished(i)) return; if (i->i != NULL && !ahtable_iter_finished(i->i)) { ahtable_iter_next(i->i); } else if (i->has_nil_key) { i->has_nil_key = false; i->nil_val = 0; hattrie_iter_nextnode(i); } hattrie_iter_step(i); } while (i->prefix_len && hattrie_iter_prefix_not_match(i)); }
int hattrie_split_mid(node_ptr node, unsigned *left_m, unsigned *right_m) { /* count the number of occourances of every leading character */ unsigned int cs[NODE_CHILDS]; // occurance count for leading chars memset(cs, 0, NODE_CHILDS * sizeof(unsigned int)); size_t len; const char* key; /*! \todo expensive, maybe some heuristics or precalc would be better */ ahtable_iter_t i; ahtable_iter_begin(node.b, &i, false); while (!ahtable_iter_finished(&i)) { key = ahtable_iter_key(&i, &len); assert(len > 0); cs[(unsigned char) key[0]] += 1; ahtable_iter_next(&i); } ahtable_iter_free(&i); /* choose a split point */ unsigned int all_m; unsigned char j = node.b->c0; all_m = ahtable_size(node.b); *left_m = cs[j]; *right_m = all_m - *left_m; int d; while (j + 1 < node.b->c1) { d = abs((int) (*left_m + cs[j + 1]) - (int) (*right_m - cs[j + 1])); if (d <= abs(*left_m - *right_m) && *left_m + cs[j + 1] < all_m) { j += 1; *left_m += cs[j]; *right_m -= cs[j]; } else break; } return j; }
static void node_apply(node_ptr node, void (*f)(value_t*,void*), void* d) { if (*node.flag & NODE_TYPE_TRIE) { size_t i; for (i = 0; i < NODE_CHILDS; ++i) { if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) continue; if (node.t->xs[i].t) node_apply(node.t->xs[i], f, d); if (*node.flag & NODE_HAS_VAL) { f(&node.t->val, d); } } } else { ahtable_iter_t i; ahtable_iter_begin(node.b, &i, false); while (!ahtable_iter_finished(&i)) { f(ahtable_iter_val(&i), d); ahtable_iter_next(&i); } ahtable_iter_free(&i); } }
/* Perform one split operation on the given node with the given parent. */ static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node) { /* only buckets may be split */ assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); assert(*parent.flag & NODE_TYPE_TRIE); if (*node.flag & NODE_TYPE_PURE_BUCKET) { /* turn the pure bucket into a hybrid bucket */ parent.t->xs[node.b->c0].t = alloc_trie_node(T, node); /* if the bucket had an empty key, move it to the new trie node */ value_t* val = ahtable_tryget(node.b, NULL, 0); if (val) { parent.t->xs[node.b->c0].t->val = *val; parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL; *val = 0; ahtable_del(node.b, NULL, 0); } node.b->c0 = 0x00; node.b->c1 = NODE_MAXCHAR; node.b->flag = NODE_TYPE_HYBRID_BUCKET; return; } /* This is a hybrid bucket. Perform a proper split. */ /* count the number of occourances of every leading character */ unsigned int cs[NODE_CHILDS]; // occurance count for leading chars memset(cs, 0, NODE_CHILDS * sizeof(unsigned int)); size_t len; const char* key; ahtable_iter_t* i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); assert(len > 0); cs[(unsigned char) key[0]] += 1; ahtable_iter_next(i); } ahtable_iter_free(i); /* choose a split point */ unsigned int left_m, right_m, all_m; unsigned char j = node.b->c0; all_m = ahtable_size(node.b); left_m = cs[j]; right_m = all_m - left_m; int d; while (j + 1 < node.b->c1) { d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1])); if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) { j += 1; left_m += cs[j]; right_m -= cs[j]; } else break; } /* now split into two node cooresponding to ranges [0, j] and * [j + 1, NODE_MAXCHAR], respectively. */ /* create new left and right nodes */ /* TODO: Add a special case if either node is a hybrid bucket containing all * the keys. In such a case, do not build a new table, just use the old one. * */ size_t num_slots; for (num_slots = ahtable_initial_size; (double) left_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); node_ptr left, right; left.b = ahtable_create_n(num_slots); left.b->c0 = node.b->c0; left.b->c1 = j; left.b->flag = left.b->c0 == left.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; for (num_slots = ahtable_initial_size; (double) right_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); right.b = ahtable_create_n(num_slots); right.b->c0 = j + 1; right.b->c1 = node.b->c1; right.b->flag = right.b->c0 == right.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; /* update the parent's pointer */ unsigned int c; for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left; for (; c <= node.b->c1; ++c) parent.t->xs[c] = right; /* distribute keys to the new left or right node */ value_t* u; value_t* v; i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); u = ahtable_iter_val(i); assert(len > 0); /* left */ if ((unsigned char) key[0] <= j) { if (*left.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(left.b, key + 1, len - 1); } else { v = ahtable_get(left.b, key, len); } *v = *u; } /* right */ else { if (*right.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(right.b, key + 1, len - 1); } else { v = ahtable_get(right.b, key, len); } *v = *u; } ahtable_iter_next(i); } ahtable_iter_free(i); ahtable_free(node.b); }