ham_status_t
btree_traverse_tree(ham_page_t **page_ref, ham_s32_t *idxptr, 
                    ham_db_t *db, ham_page_t *page, ham_key_t *key)
{
    ham_status_t st;
    ham_s32_t slot;
    int_key_t *bte;
    btree_node_t *node=ham_page_get_btree_node(page);

    /*
     * make sure that we're not in a leaf page, and that the 
     * page is not empty
     */
    ham_assert(btree_node_get_count(node)>0, (0));
    ham_assert(btree_node_get_ptr_left(node)!=0, (0));

    st=btree_get_slot(db, page, key, &slot, 0);
    if (st)
    {
        *page_ref = 0;
        return st;
    }

    if (idxptr)
        *idxptr=slot;

    if (slot==-1)
    {
        st = db_fetch_page(page_ref, db, btree_node_get_ptr_left(node), 0);
        ham_assert(st ? !*page_ref : 1, (0));
        return st;
    }
    else 
    {
        bte=btree_node_get_key(db, node, slot);
        ham_assert(key_get_flags(bte)==0 || 
                key_get_flags(bte)==KEY_IS_EXTENDED,
                ("invalid key flags 0x%x", key_get_flags(bte)));
        st = db_fetch_page(page_ref, db, key_get_ptr(bte), 0);
        ham_assert(st ? !*page_ref : 1, (0));
        return st;
    }
}
Example #2
0
static ham_status_t
__insert_nosplit(ham_page_t *page, ham_key_t *key, 
        ham_offset_t rid, ham_record_t *record, 
        ham_bt_cursor_t *cursor, insert_hints_t *hints)
{
    ham_status_t st;
    ham_u16_t count;
    ham_size_t keysize;
    ham_size_t new_dupe_id = 0;
    int_key_t *bte = 0;
    btree_node_t *node;
    ham_db_t *db=page_get_owner(page);
    ham_bool_t exists = HAM_FALSE;
    ham_s32_t slot;

    ham_assert(page_get_owner(page), (0));
    ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0));

    node=ham_page_get_btree_node(page);
    count=btree_node_get_count(node);
    keysize=db_get_keysize(db);

    if (btree_node_get_count(node)==0)
    {
        slot = 0;
    }
    else if (hints->force_append) 
    {
        slot = count;
    } 
    else if (hints->force_prepend) 
    {
        /* insert at beginning; shift all up by one */
        slot = 0;
    } 
    else 
    {
        int cmp;

        hints->cost++;
        st=btree_get_slot(db, page, key, &slot, &cmp);
        if (st)
            return (st);

        /* insert the new key at the beginning? */
        if (slot == -1) 
        {
            slot = 0;
        }
        else
        {
            /*
             * key exists already
             */
            if (cmp == 0) 
            {
                if (hints->flags & HAM_OVERWRITE) 
                {
                    /* 
                     * no need to overwrite the key - it already exists! 
                     * however, we have to overwrite the data!
                     */
                    if (!btree_node_is_leaf(node)) 
                        return (HAM_SUCCESS);
                }
                else if (!(hints->flags & HAM_DUPLICATE))
                    return (HAM_DUPLICATE_KEY);

                /* do NOT shift keys up to make room; just overwrite the current [slot] */
                exists = HAM_TRUE;
            }
            else 
            {
                /*
                 * otherwise, if the new key is > then the slot key, move to 
                 * the next slot
                 */
                if (cmp > 0) 
                {
                    slot++;
                }
            }
        }
    }

    /*
     * in any case, uncouple the cursors and see if we must shift any elements to the
     * right
     */
    bte=btree_node_get_key(db, node, slot);
    ham_assert(bte, (0));

    if (!exists)
    {
        if (count > slot)
        {
            /* uncouple all cursors & shift any elements following [slot] */
            st=bt_uncouple_all_cursors(page, slot);
            if (st)
                return (st);

            hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-slot));
            memmove(((char *)bte)+db_get_int_key_header_size()+keysize, bte,
                    (db_get_int_key_header_size()+keysize)*(count-slot));
        }

        /*
         * if a new key is created or inserted: initialize it with zeroes
         */
        memset(bte, 0, db_get_int_key_header_size()+keysize);
    }

    /*
     * if we're in the leaf: insert, overwrite or append the blob
     * (depends on the flags)
     */
    if (btree_node_is_leaf(node)) {
        ham_status_t st;

        hints->cost++;
        st=key_set_record(db, bte, record, 
                        cursor
                            ? bt_cursor_get_dupe_id(cursor)
                            : 0, 
                        hints->flags, &new_dupe_id);
        if (st)
            return (st);
        
        hints->processed_leaf_page = page;
        hints->processed_slot = slot;
    }
    else {
        key_set_ptr(bte, rid);
    }

    page_set_dirty(page, db_get_env(db));
    key_set_size(bte, key->size);

    /*
     * set a flag if the key is extended, and does not fit into the 
     * btree
     */
    if (key->size > db_get_keysize(db))
        key_set_flags(bte, key_get_flags(bte)|KEY_IS_EXTENDED);

    /*
     * if we have a cursor: couple it to the new key
     *
     * the cursor always points to NIL.
     */
    if (cursor) 
    {
        if ((st=bt_cursor_set_to_nil(cursor)))
            return (st);

        ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_UNCOUPLED), 
                ("coupling an uncoupled cursor, but need a nil-cursor"));
        ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_COUPLED), 
                ("coupling a coupled cursor, but need a nil-cursor"));
        bt_cursor_set_flags(cursor, 
                bt_cursor_get_flags(cursor)|BT_CURSOR_FLAG_COUPLED);
        bt_cursor_set_coupled_page(cursor, page);
        bt_cursor_set_coupled_index(cursor, slot);
        bt_cursor_set_dupe_id(cursor, new_dupe_id);
        memset(bt_cursor_get_dupe_cache(cursor), 0, sizeof(dupe_entry_t));
        page_add_cursor(page, (ham_cursor_t *)cursor);
    }

    /*
     * if we've overwritten a key: no need to continue, we're done
     */
    if (exists)
        return (0);

    /*
     * we insert the extended key, if necessary
     */
    key_set_key(bte, key->data, 
            db_get_keysize(db) < key->size ? db_get_keysize(db) : key->size);

    /*
     * if we need an extended key, allocate a blob and store
     * the blob-id in the key
     */
    if (key->size > db_get_keysize(db)) 
    {
        ham_offset_t blobid;

        key_set_key(bte, key->data, db_get_keysize(db));

        st=key_insert_extended(&blobid, db, page, key);
        ham_assert(st ? blobid == 0 : 1, (0));
        if (!blobid)
            return st ? st : HAM_INTERNAL_ERROR;

        key_set_extended_rid(db, bte, blobid);
    }

    /*
     * update the btree node-header
     */
    btree_node_set_count(node, count+1);

    return (0);
}
ham_s32_t 
btree_node_search_by_key(ham_db_t *db, ham_page_t *page, ham_key_t *key, 
                    ham_u32_t flags)
{
    int cmp; /* [i_a] */
    ham_s32_t slot;
    ham_status_t st;
    btree_node_t *node=ham_page_get_btree_node(page);

    /* ensure the approx flag is NOT set by anyone yet */
    ham_key_set_intflags(key, ham_key_get_intflags(key) & ~KEY_IS_APPROXIMATE);

    if (btree_node_get_count(node)==0)
        return (-1);

    st=btree_get_slot(db, page, key, &slot, &cmp);
    if (st) {
        ham_assert(st < -1, (0));
        return st;
    }

    /*
       'approximate matching'

        When we get here and cmp != 0 and we're looking for LT/GT/LEQ/GEQ 
        key matches, this is where we need to do our prep work.

        Yes, due to the flag tweak in a caller when we have (the usual) 
        multi-page DB table B+tree, both LT and GT flags are 'ON' here, 
        but let's not get carried way and assume that is always
        like that. To elaborate a bit here: it may seem like doing something 
        simple the hard way, but in here, we do NOT know if there are 
        adjacent pages, so 'edge cases' like the scenarios 1, 2, and 5 below 
        should NOT return an error KEY_NOT_FOUND but instead produce a 
        valid slot AND, most important, the accompanying 'sign' (LT/GT) flags 
        for that slot, so that the outer call can analyze our response and 
        shift the key index into the left or right adjacent page, when such 
        is available. We CANNOT see that here, so we always should work with 
        both LT+GT enabled here.
        And to make matters a wee bit more complex still: the one exception 
        to the above is when we have a single-page table: then we get 
        the actual GT/LT flags in here, as we're SURE there won't be any 
        left or right neighbour pages for us to shift into when the need 
        arrises.

        Anyway, the purpose of the next section is to see if we have a 
        matching 'approximate' key AND feed the 'sign' (i.e. LT(-1) or 
        GT(+1)) back to the caller, who knows _exactly_ what the
        user asked for and can thus take the proper action there.

        Here, we are only concerned about determining which key index we 
        should produce, IFF we should produce a matching key.

        Assume the following page layout, with two keys (values 2 and 4):

      * index:
      *    [0]   [1]  
      * +-+---+-+---+-+
      * | | 2 | | 4 | |
      * +-+---+-+---+-+

        Various scenarios apply. For the key search (key ~ 1) i.e. (key=1, 
        flags=NEAR), we get this:

        cmp = -1;
        slot = -1;

        hence we point here:

      *  |
      *  V
      * +-+---+-+---+-+
      * | | 2 | | 4 | |
      * +-+---+-+---+-+

        which is not a valid spot. Should we return a key? YES, since no key 
        is less than '1', but there exists a key '2' which fits as NEAR allows 
        for both LT and GT. Hence, this should be modified to become

        slot=0
        sign=GT

      *     | ( slot++ )
      *     V
      * +-+---+-+---+-+
      * | | 2 | | 4 | |
      * +-+---+-+---+-+


        Second scenario: key <= 1, i.e. (key=1, flags=LEQ)
        which gives us the same as above:

       cmp = -1;
       slot = -1;

        hence we point here:

      *  |
      *  V
      * +-+---+-+---+-+
      * | | 2 | | 4 | |
      * +-+---+-+---+-+

        Should we return a valid slot by adjusting? Your common sense says 
        NO, but the correct answer is YES, since (a) we do not know if the 
        user asked this, as _we_ see it in here as 'key ~ 1' anyway and 
        we must allow the caller to adjust the slot by moving it into the 
        left neighbour page -- an action we cannot do as we do not know, 
        in here, whether there's more pages adjacent to this one we're 
        currently looking at.

        EXCEPT... the common sense answer 'NO' is CORRECT when we have a 
        single-page db table in our hands; see the remark at the top of this 
        comment section; in that case, we can safely say 'NO' after all.

        Third scenario: key ~ 3
        which gives us either:

       cmp = -1;
       slot = 1;

         or

       cmp = 1;
       slot = 0;

         As we check for NEAR instead of just LT or GT, both are okay like 
         that, no adjustment needed.
         All we need to do is make sure sure we pass along the proper LT/GT 
         'sign' flags for outer level result processing.

      
        Fourth scenario: key < 3

        again, we get either:

       cmp = -1;
       slot = 1;

         or

       cmp = 1;
       slot = 0;

        but this time around, since we are looking for LT, we'll need to 
        adjust the second result, when that happens by slot++ and sending 
        the appropriate 'sign' flags.
    
      Fifth scenario: key ~ 5

        which given us:

       cmp = -1;
       slot = 1;

         hence we point here:

      *           |
      *           V
      * +-+---+-+---+-+
      * | | 2 | | 4 | |
      * +-+---+-+---+-+

        Should we return this valid slot? Yup, as long as we mention that 
        it's an LT(less than) key; the caller can see that we returned the 
        slot as the upper bound of this page and adjust accordingly when
        the actual query was 'key > 5' instead of 'key ~ 5' which is how we 
        get to see it.
    */
    /*
      Note that we have a 'preference' for LT answers in here; IFF the user'd 
        asked NEAR questions, most of the time that would give him LT answers, 
        i.e. the answers to NEAR ~ LT questions -- mark the word 'most' in 
        there: this is not happening when we're ending up at a page's lower 
        bound.
     */
    if (cmp)
    {
        /*
         When slot == -1, you're in a special situation: you do NOT know what 
         the comparison with slot[-1] delivers, because there _is_ _no_ slot 
         -1, but you _do_ know what slot[0] delivered: 'cmp' is the
         value for that one then.
         */
        if (slot < 0) 
            slot = 0;

        ham_assert(slot <= btree_node_get_count(node) - 1, (0));

        if (flags & HAM_FIND_LT_MATCH)
        {
            if (cmp < 0)
            {
                /* key @ slot is LARGER than the key we search for ... */
                if (slot > 0)
                {
                    slot--;
                    ham_key_set_intflags(key, ham_key_get_intflags(key) | KEY_IS_LT);
                    cmp = 0;
                }
                else if (flags & HAM_FIND_GT_MATCH)
                {
                    ham_assert(slot == 0, (0));
                    ham_key_set_intflags(key, ham_key_get_intflags(key) | KEY_IS_GT);
                    cmp = 0;
                }
            }
            else
            {
                /* key @ slot is SMALLER than the key we search for */
                ham_assert(cmp > 0, (0));
                ham_key_set_intflags(key, ham_key_get_intflags(key) | KEY_IS_LT);
                cmp = 0;
            }
        } 
        else if (flags&HAM_FIND_GT_MATCH)   
        {
            /*
             When we get here, we're sure HAM_FIND_LT_MATCH is NOT set...
             */
            ham_assert(!(flags&HAM_FIND_LT_MATCH), (0));

            if (cmp < 0)
            {
                /* key @ slot is LARGER than the key we search for ... */
                ham_key_set_intflags(key, ham_key_get_intflags(key) | KEY_IS_GT);
                cmp = 0;
            }
            else
            {
                /* key @ slot is SMALLER than the key we search for */
                ham_assert(cmp > 0, (0));
                if (slot < btree_node_get_count(node) - 1)
                {
                    slot++;
                    ham_key_set_intflags(key, ham_key_get_intflags(key) | KEY_IS_GT);
                    cmp = 0;
                }
            }
        }
    }

    if (cmp)
        return (-1);

    ham_assert(slot >= -1, (0));
    return (slot);
}