ham_status_t
btree_traverse_tree(ham_page_t **page_ref, ham_s32_t *idxptr, 
                    ham_db_t *db, ham_page_t *page, ham_key_t *key)
{
    ham_status_t st;
    ham_s32_t slot;
    int_key_t *bte;
    btree_node_t *node=ham_page_get_btree_node(page);

    /*
     * make sure that we're not in a leaf page, and that the 
     * page is not empty
     */
    ham_assert(btree_node_get_count(node)>0, (0));
    ham_assert(btree_node_get_ptr_left(node)!=0, (0));

    st=btree_get_slot(db, page, key, &slot, 0);
    if (st)
    {
        *page_ref = 0;
        return st;
    }

    if (idxptr)
        *idxptr=slot;

    if (slot==-1)
    {
        st = db_fetch_page(page_ref, db, btree_node_get_ptr_left(node), 0);
        ham_assert(st ? !*page_ref : 1, (0));
        return st;
    }
    else 
    {
        bte=btree_node_get_key(db, node, slot);
        ham_assert(key_get_flags(bte)==0 || 
                key_get_flags(bte)==KEY_IS_EXTENDED,
                ("invalid key flags 0x%x", key_get_flags(bte)));
        st = db_fetch_page(page_ref, db, key_get_ptr(bte), 0);
        ham_assert(st ? !*page_ref : 1, (0));
        return st;
    }
}
ham_status_t 
btree_find_cursor(ham_btree_t *be, ham_bt_cursor_t *cursor, 
           ham_key_t *key, ham_record_t *record, ham_u32_t flags)
{
	ham_status_t st;
    ham_page_t *page = NULL;
    btree_node_t *node = NULL;
    int_key_t *entry;
    ham_s32_t idx = -1;
    ham_db_t *db=be_get_db(be);
    find_hints_t hints = {flags, flags, 0, HAM_FALSE, HAM_FALSE, 1};

    btree_find_get_hints(&hints, db, key);

    if (hints.key_is_out_of_bounds) {
        stats_update_find_fail_oob(db, &hints);
        return HAM_KEY_NOT_FOUND;
    }

    if (hints.try_fast_track) {
        /* 
         * see if we get a sure hit within this btree leaf; if not, revert to 
         * regular scan 
         *
         * As this is a speed-improvement hint re-using recent material, the 
         * page should still sit in the cache, or we're using old info, which 
         * should be discarded.
         */
        st = db_fetch_page(&page, db, hints.leaf_page_addr, DB_ONLY_FROM_CACHE);
		ham_assert(st ? !page : 1, (0));
		if (st)
			return st;
        if (page) {
            node=ham_page_get_btree_node(page);
            ham_assert(btree_node_is_leaf(node), (0));
			ham_assert(btree_node_get_count(node) >= 3, (0)); /* edges + middle match */

            idx = btree_node_search_by_key(db, page, key, hints.flags);
            /* 
             * if we didn't hit a match OR a match at either edge, FAIL.
             * A match at one of the edges is very risky, as this can also 
             * signal a match far away from the current node, so we need 
             * the full tree traversal then.
             */
            if (idx <= 0 || idx >= btree_node_get_count(node) - 1) {
                idx = -1;
            }
            /*
             * else: we landed in the middle of the node, so we don't need to
             * traverse the entire tree now.
             */
        }

        /* Reset any errors which may have been collected during the hinting 
         * phase -- this is done by setting 'idx = -1' above as that effectively 
		 * clears the possible error code stored in there when (idx < -1) 
		 */
    }

    if (idx == -1) {
        /* get the address of the root page */
        if (!btree_get_rootpage(be)) {
            stats_update_find_fail(db, &hints);
            return HAM_KEY_NOT_FOUND;
        }

        /* load the root page */
        st=db_fetch_page(&page, db, btree_get_rootpage(be), 0);
		ham_assert(st ? !page : 1, (0));
        if (!page) {
            ham_assert(st, (0));
            stats_update_find_fail(db, &hints);
			return st ? st : HAM_INTERNAL_ERROR;
        }

        /* now traverse the root to the leaf nodes, till we find a leaf */
        node=ham_page_get_btree_node(page);
        if (!btree_node_is_leaf(node)) {
            /* signal 'don't care' when we have multiple pages; we resolve 
               this once we've got a hit further down */
            if (hints.flags & (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) 
                hints.flags |= (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH);

            for (;;) {
                hints.cost++;
                st=btree_traverse_tree(&page, 0, db, page, key);
                if (!page) {
                    stats_update_find_fail(db, &hints);
					return st ? st : HAM_KEY_NOT_FOUND;
                }

                node=ham_page_get_btree_node(page);
                if (btree_node_is_leaf(node))
                    break;
            }
        }

        /* check the leaf page for the key */
        idx=btree_node_search_by_key(db, page, key, hints.flags);
        if (idx < -1) {
            stats_update_find_fail(db, &hints);
            return (ham_status_t)idx;
        }
    }  /* end of regular search */

    /*
     * When we are performing an approximate match, the worst case
     * scenario is where we've picked the wrong side of the fence
     * while sitting at a page/node boundary: that's what this
     * next piece of code resolves:
     *
     * essentially it moves one record forwards or backward when
     * the flags tell us this is mandatory and we're not yet in the proper
     * position yet.
     *
     * The whole trick works, because the code above detects when
     * we need to traverse a multi-page btree -- where this worst-case
     * scenario can happen -- and adjusted the flags to accept
     * both LT and GT approximate matches so that btree_node_search_by_key()
     * will be hard pressed to return a 'key not found' signal (idx==-1),
     * instead delivering the nearest LT or GT match; all we need to
     * do now is ensure we've got the right one and if not, 
     * shift by one.
     */
    if (idx >= 0) {
        if ((ham_key_get_intflags(key) & KEY_IS_APPROXIMATE) 
            && (hints.original_flags 
                    & (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) 
                != (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) {
            if ((ham_key_get_intflags(key) & KEY_IS_GT) 
                && (hints.original_flags & HAM_FIND_LT_MATCH)) {
                /*
                 * if the index-1 is still in the page, just decrement the
                 * index
                 */
                if (idx > 0) {
                    idx--;
                }
                else {
                    /*
                     * otherwise load the left sibling page
                     */
                    if (!btree_node_get_left(node)) {
                        stats_update_find_fail(db, &hints);
                        ham_assert(node == ham_page_get_btree_node(page), (0));
                        stats_update_any_bound(db, page, key, hints.original_flags, -1);
                        return HAM_KEY_NOT_FOUND;
                    }

                    hints.cost++;
                    st = db_fetch_page(&page, db, btree_node_get_left(node), 0);
					ham_assert(st ? !page : 1, (0));
                    if (!page) {
                        ham_assert(st, (0));
                        stats_update_find_fail(db, &hints);
						return st ? st : HAM_INTERNAL_ERROR;
                    }
                    node = ham_page_get_btree_node(page);
                    idx = btree_node_get_count(node) - 1;
                }
                ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                        & ~KEY_IS_APPROXIMATE) | KEY_IS_LT);
            }
            else if ((ham_key_get_intflags(key) & KEY_IS_LT) 
                    && (hints.original_flags & HAM_FIND_GT_MATCH)) {
                /*
                 * if the index+1 is still in the page, just increment the
                 * index
                 */
                if (idx + 1 < btree_node_get_count(node)) {
                    idx++;
                }
                else {
                    /*
                     * otherwise load the right sibling page
                     */
                    if (!btree_node_get_right(node))
                    {
                        stats_update_find_fail(db, &hints);
                        ham_assert(node == ham_page_get_btree_node(page), (0));
                        stats_update_any_bound(db, page, key, hints.original_flags, -1);
                        return HAM_KEY_NOT_FOUND;
                    }

                    hints.cost++;
                    st = db_fetch_page(&page, db, 
                                    btree_node_get_right(node), 0);
                    if (!page) {
                        ham_assert(st, (0));
                        stats_update_find_fail(db, &hints);
						return st ? st : HAM_INTERNAL_ERROR;
                    }
                    node = ham_page_get_btree_node(page);
                    idx = 0;
                }
                ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                        & ~KEY_IS_APPROXIMATE) | KEY_IS_GT);
            }
        }
        else if (!(ham_key_get_intflags(key) & KEY_IS_APPROXIMATE) 
                && !(hints.original_flags & HAM_FIND_EXACT_MATCH) 
                && (hints.original_flags != 0)) {
            /* 
             * 'true GT/LT' has been added @ 2009/07/18 to complete 
             * the EQ/LEQ/GEQ/LT/GT functionality;
             *
             * 'true LT/GT' is simply an extension upon the already existing 
             * LEQ/GEQ logic just above; all we do here is move one record 
             * up/down as it just happens that we get an exact ('equal') 
             * match here.
             *
             * The fact that the LT/GT constants share their bits with the 
             * LEQ/GEQ flags so that LEQ==(LT|EXACT) and GEQ==(GT|EXACT) 
             * ensures that we can restrict our work to a simple adjustment 
             * right here; everything else has already been taken of by the 
             * LEQ/GEQ logic in the section above when the key has been 
             * flagged with the KEY_IS_APPROXIMATE flag.
             */
            if (hints.original_flags & HAM_FIND_LT_MATCH)
            {
                /*
                 * if the index-1 is still in the page, just decrement the
                 * index
                 */
                if (idx > 0)
                {
                    idx--;

                    ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                            & ~KEY_IS_APPROXIMATE) | KEY_IS_LT);
                }
                else
                {
                    /*
                     * otherwise load the left sibling page
                     */
                    if (!btree_node_get_left(node))
                    {
                        /* when an error is otherwise unavoidable, see if 
                           we have an escape route through GT? */

                        if (hints.original_flags & HAM_FIND_GT_MATCH)
                        {
                            /*
                             * if the index+1 is still in the page, just 
                             * increment the index
                             */
                            if (idx + 1 < btree_node_get_count(node))
                            {
                                idx++;
                            }
                            else
                            {
                                /*
                                 * otherwise load the right sibling page
                                 */
                                if (!btree_node_get_right(node))
                                {
                                    stats_update_find_fail(db, &hints);
                                    ham_assert(node == ham_page_get_btree_node(page), (0));
                                    stats_update_any_bound(db, page, key, hints.original_flags, -1);
                                    return HAM_KEY_NOT_FOUND;
                                }

                                hints.cost++;
                                st = db_fetch_page(&page, db,
                                                btree_node_get_right(node), 0);
                                if (!page)
                                {
                                    ham_assert(st, (0));
                                    stats_update_find_fail(db, &hints);
									return st ? st : HAM_INTERNAL_ERROR;
                                }
                                node = ham_page_get_btree_node(page);
                                idx = 0;
                            }
                            ham_key_set_intflags(key, (ham_key_get_intflags(key) & 
                                            ~KEY_IS_APPROXIMATE) | KEY_IS_GT);
                        }
                        else
                        {
                            stats_update_find_fail(db, &hints);
                            ham_assert(node == ham_page_get_btree_node(page), (0));
                            stats_update_any_bound(db, page, key, hints.original_flags, -1);
                            return HAM_KEY_NOT_FOUND;
                        }
                    }
                    else
                    {
                        hints.cost++;
                        st = db_fetch_page(&page, db,
                                        btree_node_get_left(node), 0);
                        if (!page)
                        {
                            ham_assert(st, (0));
                            stats_update_find_fail(db, &hints);
							return st ? st : HAM_INTERNAL_ERROR;
                        }
                        node = ham_page_get_btree_node(page);
                        idx = btree_node_get_count(node) - 1;

                        ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                                        & ~KEY_IS_APPROXIMATE) | KEY_IS_LT);
                    }
                }
            }
            else if (hints.original_flags & HAM_FIND_GT_MATCH)
            {
                /*
                 * if the index+1 is still in the page, just increment the
                 * index
                 */
                if (idx + 1 < btree_node_get_count(node))
                {
                    idx++;
                }
                else
                {
                    /*
                     * otherwise load the right sibling page
                     */
                    if (!btree_node_get_right(node))
                    {
                        stats_update_find_fail(db, &hints);
                        ham_assert(node == ham_page_get_btree_node(page), (0));
                        stats_update_any_bound(db, page, key, hints.original_flags, -1);
                        return HAM_KEY_NOT_FOUND;
                    }

                    hints.cost++;
                    st = db_fetch_page(&page, db, 
                                btree_node_get_right(node), 0);
                    if (!page)
                    {
                        ham_assert(st, (0));
                        stats_update_find_fail(db, &hints);
						return st ? st : HAM_INTERNAL_ERROR;
                    }
                    node = ham_page_get_btree_node(page);
                    idx = 0;
                }
                ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                                        & ~KEY_IS_APPROXIMATE) | KEY_IS_GT);
            }
        }
    }

    if (idx<0) {
        stats_update_find_fail(db, &hints);
        ham_assert(node, (0));
        ham_assert(page, (0));
        ham_assert(node == ham_page_get_btree_node(page), (0));
        stats_update_any_bound(db, page, key, hints.original_flags, -1);
        return HAM_KEY_NOT_FOUND;
    }

    /* load the entry, and store record ID and key flags */
    entry=btree_node_get_key(db, node, idx);

    /* set the cursor-position to this key */
    if (cursor) {
        ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_UNCOUPLED), 
                ("coupling an uncoupled cursor, but need a nil-cursor"));
        ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_COUPLED), 
                ("coupling a coupled cursor, but need a nil-cursor"));
        page_add_cursor(page, (ham_cursor_t *)cursor);
        bt_cursor_set_flags(cursor, 
                bt_cursor_get_flags(cursor)|BT_CURSOR_FLAG_COUPLED);
        bt_cursor_set_coupled_page(cursor, page);
        bt_cursor_set_coupled_index(cursor, idx);
    }

    /*
     * during util_read_key and util_read_record, new pages might be needed,
     * and the page at which we're pointing could be moved out of memory; 
     * that would mean that the cursor would be uncoupled, and we're losing
     * the 'entry'-pointer. therefore we 'lock' the page by incrementing 
     * the reference counter
     */
    page_add_ref(page);
    ham_assert(btree_node_is_leaf(node), ("iterator points to internal node"));

    /* no need to load the key if we have an exact match: */
    if (key && (ham_key_get_intflags(key) & KEY_IS_APPROXIMATE)) 
    {
        ham_status_t st=util_read_key(db, entry, key);
        if (st) 
        {
            page_release_ref(page);
            stats_update_find_fail(db, &hints);
            return (st);
        }
    }

    if (record) 
    {
        ham_status_t st;
        record->_intflags=key_get_flags(entry);
        record->_rid=key_get_ptr(entry);
        st=util_read_record(db, record, flags);
        if (st) 
        {
            page_release_ref(page);
            stats_update_find_fail(db, &hints);
            return (st);
        }
    }

    page_release_ref(page);
    
    stats_update_find(db, page, &hints);
    ham_assert(node == ham_page_get_btree_node(page), (0));
    stats_update_any_bound(db, page, key, hints.original_flags, idx);

    return (0);
}
Beispiel #3
0
static ham_status_t
__insert_split(ham_page_t *page, ham_key_t *key, 
        ham_offset_t rid, insert_scratchpad_t *scratchpad, 
        insert_hints_t *hints)
{
    int cmp;
    ham_status_t st;
    ham_page_t *newpage, *oldsib;
    int_key_t *nbte, *obte;
    btree_node_t *nbtp, *obtp, *sbtp;
    ham_size_t count, keysize;
    ham_db_t *db=page_get_owner(page);
    ham_env_t *env = db_get_env(db);
    ham_key_t pivotkey, oldkey;
    ham_offset_t pivotrid;
    ham_u16_t pivot;
    ham_bool_t pivot_at_end=HAM_FALSE;

    ham_assert(page_get_owner(page), (0));
    ham_assert(device_get_env(page_get_device(page)) 
            == db_get_env(page_get_owner(page)), (0));

    ham_assert(hints->force_append == HAM_FALSE, (0));

    keysize=db_get_keysize(db);

    /*
     * allocate a new page
     */
    hints->cost++;
    st=db_alloc_page(&newpage, db, PAGE_TYPE_B_INDEX, 0); 
    ham_assert(st ? page == NULL : 1, (0));
    ham_assert(!st ? page  != NULL : 1, (0));
    if (st)
        return st; 
    ham_assert(page_get_owner(newpage), (""));
    /* clear the node header */
    memset(page_get_payload(newpage), 0, sizeof(btree_node_t));

    stats_page_is_nuked(db, page, HAM_TRUE);

    /*
     * move half of the key/rid-tuples to the new page
     *
     * !! recno: keys are sorted; we do a "lazy split"
     */
    nbtp=ham_page_get_btree_node(newpage);
    nbte=btree_node_get_key(db, nbtp, 0);
    obtp=ham_page_get_btree_node(page);
    obte=btree_node_get_key(db, obtp, 0);
    count=btree_node_get_count(obtp);

    /*
     * for databases with sequential access (this includes recno databases):
     * do not split in the middle, but at the very end of the page
     *
     * if this page is the right-most page in the index, and this key is 
     * inserted at the very end, then we select the same pivot as for
     * sequential access
     */
    if (db_get_data_access_mode(db)&HAM_DAM_SEQUENTIAL_INSERT)
        pivot_at_end=HAM_TRUE;
    else if (btree_node_get_right(obtp)==0) {
        cmp=key_compare_pub_to_int(db, page, key, btree_node_get_count(obtp)-1);
        if (cmp>0)
            pivot_at_end=HAM_TRUE;
    }

    /*
     * internal pages set the count of the new page to count-pivot-1 (because
     * the pivot element will become ptr_left of the new page).
     * by using pivot=count-2 we make sure that at least 1 element will remain
     * in the new node.
     */
    if (pivot_at_end) {
        pivot=count-2;
    }
    else {
        pivot=count/2;
    }

    /*
     * uncouple all cursors
     */
    st=bt_uncouple_all_cursors(page, pivot);
    if (st)
        return (st);

    /*
     * if we split a leaf, we'll insert the pivot element in the leaf
     * page, too. in internal nodes, we don't insert it, but propagate
     * it to the parent node only.
     */
    if (btree_node_is_leaf(obtp)) {
        hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot));
        memcpy((char *)nbte,
               ((char *)obte)+(db_get_int_key_header_size()+keysize)*pivot, 
               (db_get_int_key_header_size()+keysize)*(count-pivot));
    }
    else {
        hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot-1));
        memcpy((char *)nbte,
               ((char *)obte)+(db_get_int_key_header_size()+keysize)*(pivot+1), 
               (db_get_int_key_header_size()+keysize)*(count-pivot-1));
    }
    
    /* 
     * store the pivot element, we'll need it later to propagate it 
     * to the parent page
     */
    nbte=btree_node_get_key(db, obtp, pivot);

    memset(&pivotkey, 0, sizeof(pivotkey));
    memset(&oldkey, 0, sizeof(oldkey));
    oldkey.data=key_get_key(nbte);
    oldkey.size=key_get_size(nbte);
    oldkey._flags=key_get_flags(nbte);
    st = util_copy_key(db, &oldkey, &pivotkey);
    if (st) 
    {
        (void)db_free_page(newpage, DB_MOVE_TO_FREELIST);
        goto fail_dramatically;
    }
    pivotrid=page_get_self(newpage);

    /*
     * adjust the page count
     */
    if (btree_node_is_leaf(obtp)) {
        btree_node_set_count(obtp, pivot);
        btree_node_set_count(nbtp, count-pivot);
    }
    else {
        btree_node_set_count(obtp, pivot);
        btree_node_set_count(nbtp, count-pivot-1);
    }

    /*
     * if we're in an internal page: fix the ptr_left of the new page
     * (it points to the ptr of the pivot key)
     */ 
    if (!btree_node_is_leaf(obtp)) {
        /* 
         * nbte still contains the pivot key 
         */
        btree_node_set_ptr_left(nbtp, key_get_ptr(nbte));
    }

    /*
     * insert the new element
     */
    hints->cost++;
    cmp=key_compare_pub_to_int(db, page, key, pivot);
    if (cmp < -1) 
    {
        st = (ham_status_t)cmp;
        goto fail_dramatically;
    }

    if (cmp>=0)
        st=__insert_nosplit(newpage, key, rid, 
                scratchpad->record, scratchpad->cursor, hints);
    else
        st=__insert_nosplit(page, key, rid, 
                scratchpad->record, scratchpad->cursor, hints);
    if (st) 
    {
        goto fail_dramatically;
    }
    scratchpad->cursor=0; /* don't overwrite cursor if __insert_nosplit
                             is called again */

    /*
     * fix the double-linked list of pages, and mark the pages as dirty
     */
    if (btree_node_get_right(obtp)) 
    {
        st=db_fetch_page(&oldsib, db, btree_node_get_right(obtp), 0);
        if (st)
            goto fail_dramatically;
    }
    else
    {
        oldsib=0;
    }

    if (oldsib) {
        st=ham_log_add_page_before(oldsib);
        if (st)
            goto fail_dramatically;
    }

    btree_node_set_left (nbtp, page_get_self(page));
    btree_node_set_right(nbtp, btree_node_get_right(obtp));
    btree_node_set_right(obtp, page_get_self(newpage));
    if (oldsib) {
        sbtp=ham_page_get_btree_node(oldsib);
        btree_node_set_left(sbtp, page_get_self(newpage));
        page_set_dirty(oldsib, env);
    }
    page_set_dirty(newpage, env);
    page_set_dirty(page, env);

    /* 
     * propagate the pivot key to the parent page
     */
    ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0));
    if (scratchpad->key.data)
        allocator_free(env_get_allocator(env), scratchpad->key.data);
    scratchpad->key=pivotkey;
    scratchpad->rid=pivotrid;
    ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0));

    return (SPLIT);

fail_dramatically:
    ham_assert(!(pivotkey.flags & HAM_KEY_USER_ALLOC), (0));
    if (pivotkey.data)
        allocator_free(env_get_allocator(env), pivotkey.data);
    return st;
}
Beispiel #4
0
static ham_status_t
__insert_cursor(ham_btree_t *be, ham_key_t *key, ham_record_t *record, 
                ham_bt_cursor_t *cursor, insert_hints_t *hints)
{
    ham_status_t st;
    ham_page_t *root;
    ham_db_t *db=be_get_db(be);
    ham_env_t *env = db_get_env(db);
    insert_scratchpad_t scratchpad;

    ham_assert(hints->force_append == HAM_FALSE, (0));
    ham_assert(hints->force_prepend == HAM_FALSE, (0));

    /* 
     * initialize the scratchpad 
     */
    memset(&scratchpad, 0, sizeof(scratchpad));
    scratchpad.be=be;
    scratchpad.record=record;
    scratchpad.cursor=cursor;

    /* 
     * get the root-page...
     */
    ham_assert(btree_get_rootpage(be)!=0, ("btree has no root page"));
    st=db_fetch_page(&root, db, btree_get_rootpage(be), 0);
    ham_assert(st ? root == NULL : 1, (0));
    if (st)
        return st;

    /* 
     * ... and start the recursion 
     */
    st=__insert_recursive(root, key, 0, &scratchpad, hints);

    /*
     * if the root page was split, we have to create a new
     * root page.
     */
    if (st==SPLIT) {
        ham_page_t *newroot;
        btree_node_t *node;

        /*
         * the root-page will be changed...
         */
        st=ham_log_add_page_before(root);
        if (st)
            return (st);

        /*
         * allocate a new root page
         */
        st=db_alloc_page(&newroot, db, PAGE_TYPE_B_ROOT, 0); 
        ham_assert(st ? newroot == NULL : 1, (0));
        if (st)
            return (st);
        ham_assert(page_get_owner(newroot), (""));
        /* clear the node header */
        memset(page_get_payload(newroot), 0, sizeof(btree_node_t));

        stats_page_is_nuked(db, root, HAM_TRUE);

        /* 
         * insert the pivot element and the ptr_left
         */ 
        node=ham_page_get_btree_node(newroot);
        btree_node_set_ptr_left(node, btree_get_rootpage(be));
        st=__insert_nosplit(newroot, &scratchpad.key, 
                scratchpad.rid, scratchpad.record, scratchpad.cursor, 
                hints);
        ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0));
        scratchpad.cursor=0; /* don't overwrite cursor if __insert_nosplit
                                is called again */
        if (st) {
            ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0));
            if (scratchpad.key.data)
                allocator_free(env_get_allocator(env), scratchpad.key.data);
            return (st);
        }

        /*
         * set the new root page
         *
         * !!
         * do NOT delete the old root page - it's still in use!
         *
         * also don't forget to flush the backend - otherwise the header
         * page of the database will not contain the updated information.
         * The backend is flushed when the database is closed, but if 
         * recovery is enabled then the flush here is critical.
         */
        btree_set_rootpage(be, page_get_self(newroot));
        be_set_dirty(be, HAM_TRUE);
        be->_fun_flush(be);

        /*
         * As we re-purpose a page, we will reset its pagecounter
         * as well to signal its first use as the new type assigned
         * here.
         */
        if (env_get_cache(env) && (page_get_type(root)!=PAGE_TYPE_B_INDEX))
            cache_update_page_access_counter(root, env_get_cache(env), 0);

        page_set_type(root, PAGE_TYPE_B_INDEX);
        page_set_dirty(root, env);
        page_set_dirty(newroot, env);

        /* the root page was modified (btree_set_rootpage) - make sure that
         * it's logged */
        if (env_get_rt_flags(env)&HAM_ENABLE_RECOVERY) {
            st=txn_add_page(env_get_txn(env), env_get_header_page(env),
                    HAM_TRUE);
            if (st)
                return (st);
        }
    }

    /*
     * release the scratchpad-memory and return to caller
     */
    ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0));
    if (scratchpad.key.data)
        allocator_free(env_get_allocator(env), scratchpad.key.data);

    return (st);
}
Beispiel #5
0
static ham_status_t
__append_key(ham_btree_t *be, ham_key_t *key, ham_record_t *record, 
                ham_bt_cursor_t *cursor, insert_hints_t *hints)
{
    ham_status_t st=0;
    ham_page_t *page;
    btree_node_t *node;
    ham_db_t *db;

#ifdef HAM_DEBUG
    if (cursor && !bt_cursor_is_nil(cursor)) {
        ham_assert(be_get_db(be) == bt_cursor_get_db(cursor), (0));
    }
#endif

    db = be_get_db(be);

    /* 
     * see if we get this btree leaf; if not, revert to regular scan 
     *    
     * As this is a speed-improvement hint re-using recent material, the page 
     * should still sit in the cache, or we're using old info, which should be 
     * discarded.
     */
    st = db_fetch_page(&page, db, hints->leaf_page_addr, DB_ONLY_FROM_CACHE);
    if (st)
        return st;
    if (!page) {
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_FALSE;
        return (__insert_cursor(be, key, record, cursor, hints));
    }

    page_add_ref(page);
    node=ham_page_get_btree_node(page);
    ham_assert(btree_node_is_leaf(node), ("iterator points to internal node"));

    /*
     * if the page is already full OR this page is not the right-most page
     * when we APPEND or the left-most node when we PREPEND
     * OR the new key is not the highest key: perform a normal insert
     */
    if ((hints->force_append && btree_node_get_right(node))
            || (hints->force_prepend && btree_node_get_left(node))
            || btree_node_get_count(node) >= btree_get_maxkeys(be)) {
        page_release_ref(page);
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_FALSE;
        return (__insert_cursor(be, key, record, cursor, hints));
    }

    /*
     * if the page is not empty: check if we append the key at the end / start
     * (depending on force_append/force_prepend),
     * or if it's actually inserted in the middle (when neither force_append 
     * or force_prepend is specified: that'd be SEQUENTIAL insertion 
     * hinting somewhere in the middle of the total key range.
     */
    if (btree_node_get_count(node)!=0) {
        int cmp_hi;
        int cmp_lo;

        hints->cost++;
        if (!hints->force_prepend) {
            cmp_hi = key_compare_pub_to_int(db, page, key, 
                                btree_node_get_count(node)-1);
            /* key is in the middle */
            if (cmp_hi < -1) {
                page_release_ref(page);
                return (ham_status_t)cmp_hi;
            }
            /* key is at the end */
            if (cmp_hi > 0) {
                if (btree_node_get_right(node)) {
                    /* not at top end of the btree, so we can't do the 
                     * fast track */
                    page_release_ref(page);
                    //hints->flags &= ~HAM_HINT_APPEND;
                    hints->force_append = HAM_FALSE;
                    hints->force_prepend = HAM_FALSE;
                    return (__insert_cursor(be, key, record, cursor, hints));
                }

                hints->force_append = HAM_TRUE;
                hints->force_prepend = HAM_FALSE;
            }
        }
        else { /* hints->force_prepend is true */
            /* not bigger than the right-most node while we 
             * were trying to APPEND */
            cmp_hi = -1;
        }

        if (!hints->force_append) {
            cmp_lo = key_compare_pub_to_int(db, page, key, 0);
            /* in the middle range */
            if (cmp_lo < -1) {
                page_release_ref(page);
                return ((ham_status_t)cmp_lo);
            }
            /* key is at the start of page */
            if (cmp_lo < 0) {
                if (btree_node_get_left(node)) {
                    /* not at bottom end of the btree, so we can't 
                     * do the fast track */
                    page_release_ref(page);
                    //hints->flags &= ~HAM_HINT_PREPEND;
                    hints->force_append = HAM_FALSE;
                    hints->force_prepend = HAM_FALSE;
                    return (__insert_cursor(be, key, record, cursor, hints));
                }

                hints->force_append = HAM_FALSE;
                hints->force_prepend = HAM_TRUE;
            }
        }
        else { /* hints->force_prepend is true */
            /* not smaller than the left-most node while we were 
             * trying to PREPEND */
            cmp_lo = +1;
        }

        /* handle inserts in the middle range */
        if (cmp_lo >= 0 && cmp_hi <= 0) {
            /*
             * Depending on where we are in the btree, the current key either
             * is going to end up in the middle of the given node/page,
             * OR the given key is out of range of the given leaf node.
             */
            if (hints->force_append || hints->force_prepend) {
                /*
                 * when prepend or append is FORCED, we are expected to 
                 * add keys ONLY at the beginning or end of the btree
                 * key range. Clearly the current key does not fit that
                 * criterium.
                 */
                page_release_ref(page);
                //hints->flags &= ~HAM_HINT_PREPEND;
                hints->force_append = HAM_FALSE;
                hints->force_prepend = HAM_FALSE;
                return (__insert_cursor(be, key, record, cursor, hints));
            }

            /* 
             * we discovered that the key must be inserted in the middle 
             * of the current leaf.
             * 
             * It does not matter whether the current leaf is at the start or
             * end of the btree range; as we need to add the key in the middle
             * of the current leaf, that info alone is enough to continue with
             * the fast track insert operation.
             */
            ham_assert(!hints->force_prepend && !hints->force_append, (0));
        }

        ham_assert((hints->force_prepend + hints->force_append) < 2, 
                ("Either APPEND or PREPEND flag MAY be set, but not both"));
    }
    else { /* empty page: force insertion in slot 0 */
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_TRUE;
    }

    /*
     * the page will be changed - write it to the log (if a log exists)
     */
    st=ham_log_add_page_before(page);
    if (st) {
        page_release_ref(page);
        return (st);
    }

    /*
     * OK - we're really appending/prepending the new key.
     */
    ham_assert(hints->force_append || hints->force_prepend, (0));
    st=__insert_nosplit(page, key, 0, record, cursor, hints);

    page_release_ref(page);
    return (st);
}