Пример #1
0
/* Perform one split operation on the given node with the given parent.
 */
static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node)
{
    /* only buckets may be split */
    assert(*node.flag & NODE_TYPE_PURE_BUCKET ||
           *node.flag & NODE_TYPE_HYBRID_BUCKET);

    assert(*parent.flag & NODE_TYPE_TRIE);

    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
        /* turn the pure bucket into a hybrid bucket */
        parent.t->xs[node.b->c0].t = alloc_trie_node(T, node);

        /* if the bucket had an empty key, move it to the new trie node */
        value_t* val = ahtable_tryget(node.b, NULL, 0);
        if (val) {
            parent.t->xs[node.b->c0].t->val     = *val;
            parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL;
            *val = 0;
            ahtable_del(node.b, NULL, 0);
        }

        node.b->c0   = 0x00;
        node.b->c1   = NODE_MAXCHAR;
        node.b->flag = NODE_TYPE_HYBRID_BUCKET;

        return;
    }

    /* This is a hybrid bucket. Perform a proper split. */

    /* count the number of occourances of every leading character */
    unsigned int cs[NODE_CHILDS]; // occurance count for leading chars
    memset(cs, 0, NODE_CHILDS * sizeof(unsigned int));
    size_t len;
    const char* key;

    ahtable_iter_t* i = ahtable_iter_begin(node.b, false);
    while (!ahtable_iter_finished(i)) {
        key = ahtable_iter_key(i, &len);
        assert(len > 0);
        cs[(unsigned char) key[0]] += 1;
        ahtable_iter_next(i);
    }
    ahtable_iter_free(i);

    /* choose a split point */
    unsigned int left_m, right_m, all_m;
    unsigned char j = node.b->c0;
    all_m   = ahtable_size(node.b);
    left_m  = cs[j];
    right_m = all_m - left_m;
    int d;

    while (j + 1 < node.b->c1) {
        d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1]));
        if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) {
            j += 1;
            left_m  += cs[j];
            right_m -= cs[j];
        }
        else break;
    }

    /* now split into two node cooresponding to ranges [0, j] and
     * [j + 1, NODE_MAXCHAR], respectively. */


    /* create new left and right nodes */

    /* TODO: Add a special case if either node is a hybrid bucket containing all
     * the keys. In such a case, do not build a new table, just use the old one.
     * */
    size_t num_slots;


    for (num_slots = ahtable_initial_size;
            (double) left_m > ahtable_max_load_factor * (double) num_slots;
            num_slots *= 2);

    node_ptr left, right;
    left.b  = ahtable_create_n(num_slots);
    left.b->c0   = node.b->c0;
    left.b->c1   = j;
    left.b->flag = left.b->c0 == left.b->c1 ?
                      NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;


    for (num_slots = ahtable_initial_size;
            (double) right_m > ahtable_max_load_factor * (double) num_slots;
            num_slots *= 2);

    right.b = ahtable_create_n(num_slots);
    right.b->c0   = j + 1;
    right.b->c1   = node.b->c1;
    right.b->flag = right.b->c0 == right.b->c1 ?
                      NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;


    /* update the parent's pointer */

    unsigned int c;
    for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left;
    for (; c <= node.b->c1; ++c)      parent.t->xs[c] = right;



    /* distribute keys to the new left or right node */
    value_t* u;
    value_t* v;
    i = ahtable_iter_begin(node.b, false);
    while (!ahtable_iter_finished(i)) {
        key = ahtable_iter_key(i, &len);
        u   = ahtable_iter_val(i);
        assert(len > 0);

        /* left */
        if ((unsigned char) key[0] <= j) {
            if (*left.flag & NODE_TYPE_PURE_BUCKET) {
                v = ahtable_get(left.b, key + 1, len - 1);
            }
            else {
                v = ahtable_get(left.b, key, len);
            }
            *v = *u;
        }

        /* right */
        else {
            if (*right.flag & NODE_TYPE_PURE_BUCKET) {
                v = ahtable_get(right.b, key + 1, len - 1);
            }
            else {
                v = ahtable_get(right.b, key, len);
            }
            *v = *u;
        }

        ahtable_iter_next(i);
    }

    ahtable_iter_free(i);
    ahtable_free(node.b);
}
Пример #2
0
value_t* hattrie_get(hattrie_t* T, const char* key, size_t len)
{
    node_ptr parent = T->root;
    assert(*parent.flag & NODE_TYPE_TRIE);

    if (len == 0) return &parent.t->val;

    /* consume all trie nodes, now parent must be trie and child anything */
    node_ptr node = hattrie_consume(&parent, &key, &len, 0);
    assert(*parent.flag & NODE_TYPE_TRIE);

    /* key wasn't consumed and using pure tries */
    if (T->bsize == 0) {
        node.t = parent.t;
        while (len > 0) {
            node.t->xs[(unsigned char) *key].t = alloc_empty_node(T);
            node = node.t->xs[(unsigned char) *key];
            ++key;
            --len;
        }

        return hattrie_useval(T, node);
    }

    /* if the key has been consumed on a trie node, use its value */
    if (len == 0) {
        if (*node.flag & NODE_TYPE_TRIE) {
            return hattrie_useval(T, node);
        }
        else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
            return hattrie_useval(T, parent);
        }
    }

    /* preemptively split the bucket if it is full */
    while (ahtable_size(node.b) >= T->bsize) {
        hattrie_split(T, parent, node);

        /* after the split, the node pointer is invalidated, so we search from
         * the parent again. */
        node = hattrie_consume(&parent, &key, &len, 0);

        /* if the key has been consumed on a trie node, use its value */
        if (len == 0) {
            if (*node.flag & NODE_TYPE_TRIE) {
                return hattrie_useval(T, node);
            }
            else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
                return hattrie_useval(T, parent);
            }
        }
    }

    assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET);

    assert(len > 0);
    size_t m_old = node.b->m;
    value_t* val;
    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
        val = ahtable_get(node.b, key + 1, len - 1);
    }
    else {
        val = ahtable_get(node.b, key, len);
    }
    T->m += (node.b->m - m_old);

    return val;
}