/* Perform one split operation on the given node with the given parent. */ static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node) { /* only buckets may be split */ assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); assert(*parent.flag & NODE_TYPE_TRIE); if (*node.flag & NODE_TYPE_PURE_BUCKET) { /* turn the pure bucket into a hybrid bucket */ parent.t->xs[node.b->c0].t = alloc_trie_node(T, node); /* if the bucket had an empty key, move it to the new trie node */ value_t* val = ahtable_tryget(node.b, NULL, 0); if (val) { parent.t->xs[node.b->c0].t->val = *val; parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL; *val = 0; ahtable_del(node.b, NULL, 0); } node.b->c0 = 0x00; node.b->c1 = NODE_MAXCHAR; node.b->flag = NODE_TYPE_HYBRID_BUCKET; return; } /* This is a hybrid bucket. Perform a proper split. */ /* count the number of occourances of every leading character */ unsigned int cs[NODE_CHILDS]; // occurance count for leading chars memset(cs, 0, NODE_CHILDS * sizeof(unsigned int)); size_t len; const char* key; ahtable_iter_t* i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); assert(len > 0); cs[(unsigned char) key[0]] += 1; ahtable_iter_next(i); } ahtable_iter_free(i); /* choose a split point */ unsigned int left_m, right_m, all_m; unsigned char j = node.b->c0; all_m = ahtable_size(node.b); left_m = cs[j]; right_m = all_m - left_m; int d; while (j + 1 < node.b->c1) { d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1])); if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) { j += 1; left_m += cs[j]; right_m -= cs[j]; } else break; } /* now split into two node cooresponding to ranges [0, j] and * [j + 1, NODE_MAXCHAR], respectively. */ /* create new left and right nodes */ /* TODO: Add a special case if either node is a hybrid bucket containing all * the keys. In such a case, do not build a new table, just use the old one. * */ size_t num_slots; for (num_slots = ahtable_initial_size; (double) left_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); node_ptr left, right; left.b = ahtable_create_n(num_slots); left.b->c0 = node.b->c0; left.b->c1 = j; left.b->flag = left.b->c0 == left.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; for (num_slots = ahtable_initial_size; (double) right_m > ahtable_max_load_factor * (double) num_slots; num_slots *= 2); right.b = ahtable_create_n(num_slots); right.b->c0 = j + 1; right.b->c1 = node.b->c1; right.b->flag = right.b->c0 == right.b->c1 ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; /* update the parent's pointer */ unsigned int c; for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left; for (; c <= node.b->c1; ++c) parent.t->xs[c] = right; /* distribute keys to the new left or right node */ value_t* u; value_t* v; i = ahtable_iter_begin(node.b, false); while (!ahtable_iter_finished(i)) { key = ahtable_iter_key(i, &len); u = ahtable_iter_val(i); assert(len > 0); /* left */ if ((unsigned char) key[0] <= j) { if (*left.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(left.b, key + 1, len - 1); } else { v = ahtable_get(left.b, key, len); } *v = *u; } /* right */ else { if (*right.flag & NODE_TYPE_PURE_BUCKET) { v = ahtable_get(right.b, key + 1, len - 1); } else { v = ahtable_get(right.b, key, len); } *v = *u; } ahtable_iter_next(i); } ahtable_iter_free(i); ahtable_free(node.b); }
value_t* hattrie_get(hattrie_t* T, const char* key, size_t len) { node_ptr parent = T->root; assert(*parent.flag & NODE_TYPE_TRIE); if (len == 0) return &parent.t->val; /* consume all trie nodes, now parent must be trie and child anything */ node_ptr node = hattrie_consume(&parent, &key, &len, 0); assert(*parent.flag & NODE_TYPE_TRIE); /* key wasn't consumed and using pure tries */ if (T->bsize == 0) { node.t = parent.t; while (len > 0) { node.t->xs[(unsigned char) *key].t = alloc_empty_node(T); node = node.t->xs[(unsigned char) *key]; ++key; --len; } return hattrie_useval(T, node); } /* if the key has been consumed on a trie node, use its value */ if (len == 0) { if (*node.flag & NODE_TYPE_TRIE) { return hattrie_useval(T, node); } else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) { return hattrie_useval(T, parent); } } /* preemptively split the bucket if it is full */ while (ahtable_size(node.b) >= T->bsize) { hattrie_split(T, parent, node); /* after the split, the node pointer is invalidated, so we search from * the parent again. */ node = hattrie_consume(&parent, &key, &len, 0); /* if the key has been consumed on a trie node, use its value */ if (len == 0) { if (*node.flag & NODE_TYPE_TRIE) { return hattrie_useval(T, node); } else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) { return hattrie_useval(T, parent); } } } assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); assert(len > 0); size_t m_old = node.b->m; value_t* val; if (*node.flag & NODE_TYPE_PURE_BUCKET) { val = ahtable_get(node.b, key + 1, len - 1); } else { val = ahtable_get(node.b, key, len); } T->m += (node.b->m - m_old); return val; }