Esempio n. 1
0
ham_status_t
txn_add_page(ham_txn_t *txn, ham_page_t *page, ham_bool_t ignore_if_inserted)
{
    /*
     * don't re-insert, if 'ignore_if_inserted' is true
     */
    if (ignore_if_inserted && txn_get_page(txn, page_get_self(page)))
        return (0);

#ifdef HAM_DEBUG
    /*
     * check if the page is already in the transaction's pagelist - 
     * that would be a bug
     */
    ham_assert(txn_get_page(txn, page_get_self(page))==0, 
            ("page 0x%llx is already in the txn", page_get_self(page)));
#endif

    /*
     * not found? add the page
     */
    page_add_ref(page);

    ham_assert(!page_is_in_list(txn_get_pagelist(txn), page, PAGE_LIST_TXN), (0));
    txn_set_pagelist(txn, page_list_insert(txn_get_pagelist(txn), 
            PAGE_LIST_TXN, page));

    return (HAM_SUCCESS);
}
Esempio n. 2
0
ham_status_t
blob_duplicate_insert(ham_db_t *db, ham_offset_t table_id, 
        ham_record_t *record, ham_size_t position, ham_u32_t flags, 
        dupe_entry_t *entries, ham_size_t num_entries, 
        ham_offset_t *rid, ham_size_t *new_position)
{
    ham_status_t st=0;
    dupe_table_t *table=0;
    ham_bool_t alloc_table=0;
	ham_bool_t resize=0;
    ham_page_t *page=0;
    ham_env_t *env=db_get_env(db);

    /*
     * create a new duplicate table if none existed, and insert
     * the first entry
     */
    if (!table_id) {
        ham_assert(num_entries==2, (""));
        /* allocates space for 8 (!) entries */
        table=allocator_calloc(env_get_allocator(env), 
                        sizeof(dupe_table_t)+7*sizeof(dupe_entry_t));
        if (!table)
            return HAM_OUT_OF_MEMORY;
        dupe_table_set_capacity(table, 8);
        dupe_table_set_count(table, 1);
        memcpy(dupe_table_get_entry(table, 0), &entries[0], 
                        sizeof(entries[0]));

        /* skip the first entry */
        entries++;
        num_entries--;
        alloc_table=1;
    }
    else {
        /*
         * otherwise load the existing table 
         */
        st=__get_duplicate_table(&table, &page, env, table_id);
		ham_assert(st ? table == NULL : 1, (0));
		ham_assert(st ? page == NULL : 1, (0));
        if (!table)
			return st ? st : HAM_INTERNAL_ERROR;
        if (!page && !(env_get_rt_flags(env)&HAM_IN_MEMORY_DB))
            alloc_table=1;
    }

    if (page)
        if ((st=ham_log_add_page_before(page)))
            return (st);

    ham_assert(num_entries==1, (""));

    /*
     * resize the table, if necessary
     */ 
    if (!(flags & HAM_OVERWRITE)
            && dupe_table_get_count(table)+1>=dupe_table_get_capacity(table)) {
        dupe_table_t *old=table;
        ham_size_t new_cap=dupe_table_get_capacity(table);

        if (new_cap < 3*8)
            new_cap += 8;
        else
            new_cap += new_cap/3;

        table=allocator_calloc(env_get_allocator(env), sizeof(dupe_table_t)+
                        (new_cap-1)*sizeof(dupe_entry_t));
        if (!table)
            return (HAM_OUT_OF_MEMORY);
        dupe_table_set_capacity(table, new_cap);
        dupe_table_set_count(table, dupe_table_get_count(old));
        memcpy(dupe_table_get_entry(table, 0), dupe_table_get_entry(old, 0),
                       dupe_table_get_count(old)*sizeof(dupe_entry_t));
        if (alloc_table)
            allocator_free(env_get_allocator(env), old);

        alloc_table=1;
        resize=1;
    }

    /*
     * insert sorted, unsorted or overwrite the entry at the requested position
     */
    if (flags&HAM_OVERWRITE) {
        dupe_entry_t *e=dupe_table_get_entry(table, position);

        if (!(dupe_entry_get_flags(e)&(KEY_BLOB_SIZE_SMALL
                                    |KEY_BLOB_SIZE_TINY
                                    |KEY_BLOB_SIZE_EMPTY))) {
            (void)blob_free(env, db, dupe_entry_get_rid(e), 0);
        }

        memcpy(dupe_table_get_entry(table, position), 
                        &entries[0], sizeof(entries[0]));
    }
    else {
        if (db_get_rt_flags(db)&HAM_SORT_DUPLICATES) {
            if (page)
                page_add_ref(page);
            position=__get_sorted_position(db, table, record, flags);
            if (page)
                page_release_ref(page);
            if (position<0)
                return ((ham_status_t)position);
        }
        else if (flags&HAM_DUPLICATE_INSERT_BEFORE) {
            /* do nothing, insert at the current position */
        }
        else if (flags&HAM_DUPLICATE_INSERT_AFTER) {
            position++;
            if (position > dupe_table_get_count(table))
                position=dupe_table_get_count(table);
        }
        else if (flags&HAM_DUPLICATE_INSERT_FIRST) {
            position=0;
        }
        else if (flags&HAM_DUPLICATE_INSERT_LAST) {
            position=dupe_table_get_count(table);
        }
        else {
            position=dupe_table_get_count(table);
        }

        if (position != dupe_table_get_count(table)) {
            memmove(dupe_table_get_entry(table, position+1), 
                dupe_table_get_entry(table, position), 
                sizeof(entries[0])*(dupe_table_get_count(table)-position));
        }

        memcpy(dupe_table_get_entry(table, position), 
                &entries[0], sizeof(entries[0]));

        dupe_table_set_count(table, dupe_table_get_count(table)+1);
    }

    /*
     * write the table back to disk and return the blobid of the table
     */
    if ((table_id && !page) || resize) {
        ham_record_t rec={0};
        rec.data=(ham_u8_t *)table;
        rec.size=sizeof(dupe_table_t)
                    +(dupe_table_get_capacity(table)-1)*sizeof(dupe_entry_t);
        st=blob_overwrite(env, db, table_id, &rec, 0, rid);
    }
    else if (!table_id) {
        ham_record_t rec={0};
        rec.data=(ham_u8_t *)table;
        rec.size=sizeof(dupe_table_t)
                    +(dupe_table_get_capacity(table)-1)*sizeof(dupe_entry_t);
        st=blob_allocate(env, db, &rec, 0, rid);
    }
    else if (table_id && page) {
        page_set_dirty(page, env);
    }
    else {
        ham_assert(!"shouldn't be here", (0));
	}

    if (alloc_table)
        allocator_free(env_get_allocator(env), table);

    if (new_position)
        *new_position=position;

    return (st);
}
ham_status_t 
btree_find_cursor(ham_btree_t *be, ham_bt_cursor_t *cursor, 
           ham_key_t *key, ham_record_t *record, ham_u32_t flags)
{
	ham_status_t st;
    ham_page_t *page = NULL;
    btree_node_t *node = NULL;
    int_key_t *entry;
    ham_s32_t idx = -1;
    ham_db_t *db=be_get_db(be);
    find_hints_t hints = {flags, flags, 0, HAM_FALSE, HAM_FALSE, 1};

    btree_find_get_hints(&hints, db, key);

    if (hints.key_is_out_of_bounds) {
        stats_update_find_fail_oob(db, &hints);
        return HAM_KEY_NOT_FOUND;
    }

    if (hints.try_fast_track) {
        /* 
         * see if we get a sure hit within this btree leaf; if not, revert to 
         * regular scan 
         *
         * As this is a speed-improvement hint re-using recent material, the 
         * page should still sit in the cache, or we're using old info, which 
         * should be discarded.
         */
        st = db_fetch_page(&page, db, hints.leaf_page_addr, DB_ONLY_FROM_CACHE);
		ham_assert(st ? !page : 1, (0));
		if (st)
			return st;
        if (page) {
            node=ham_page_get_btree_node(page);
            ham_assert(btree_node_is_leaf(node), (0));
			ham_assert(btree_node_get_count(node) >= 3, (0)); /* edges + middle match */

            idx = btree_node_search_by_key(db, page, key, hints.flags);
            /* 
             * if we didn't hit a match OR a match at either edge, FAIL.
             * A match at one of the edges is very risky, as this can also 
             * signal a match far away from the current node, so we need 
             * the full tree traversal then.
             */
            if (idx <= 0 || idx >= btree_node_get_count(node) - 1) {
                idx = -1;
            }
            /*
             * else: we landed in the middle of the node, so we don't need to
             * traverse the entire tree now.
             */
        }

        /* Reset any errors which may have been collected during the hinting 
         * phase -- this is done by setting 'idx = -1' above as that effectively 
		 * clears the possible error code stored in there when (idx < -1) 
		 */
    }

    if (idx == -1) {
        /* get the address of the root page */
        if (!btree_get_rootpage(be)) {
            stats_update_find_fail(db, &hints);
            return HAM_KEY_NOT_FOUND;
        }

        /* load the root page */
        st=db_fetch_page(&page, db, btree_get_rootpage(be), 0);
		ham_assert(st ? !page : 1, (0));
        if (!page) {
            ham_assert(st, (0));
            stats_update_find_fail(db, &hints);
			return st ? st : HAM_INTERNAL_ERROR;
        }

        /* now traverse the root to the leaf nodes, till we find a leaf */
        node=ham_page_get_btree_node(page);
        if (!btree_node_is_leaf(node)) {
            /* signal 'don't care' when we have multiple pages; we resolve 
               this once we've got a hit further down */
            if (hints.flags & (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) 
                hints.flags |= (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH);

            for (;;) {
                hints.cost++;
                st=btree_traverse_tree(&page, 0, db, page, key);
                if (!page) {
                    stats_update_find_fail(db, &hints);
					return st ? st : HAM_KEY_NOT_FOUND;
                }

                node=ham_page_get_btree_node(page);
                if (btree_node_is_leaf(node))
                    break;
            }
        }

        /* check the leaf page for the key */
        idx=btree_node_search_by_key(db, page, key, hints.flags);
        if (idx < -1) {
            stats_update_find_fail(db, &hints);
            return (ham_status_t)idx;
        }
    }  /* end of regular search */

    /*
     * When we are performing an approximate match, the worst case
     * scenario is where we've picked the wrong side of the fence
     * while sitting at a page/node boundary: that's what this
     * next piece of code resolves:
     *
     * essentially it moves one record forwards or backward when
     * the flags tell us this is mandatory and we're not yet in the proper
     * position yet.
     *
     * The whole trick works, because the code above detects when
     * we need to traverse a multi-page btree -- where this worst-case
     * scenario can happen -- and adjusted the flags to accept
     * both LT and GT approximate matches so that btree_node_search_by_key()
     * will be hard pressed to return a 'key not found' signal (idx==-1),
     * instead delivering the nearest LT or GT match; all we need to
     * do now is ensure we've got the right one and if not, 
     * shift by one.
     */
    if (idx >= 0) {
        if ((ham_key_get_intflags(key) & KEY_IS_APPROXIMATE) 
            && (hints.original_flags 
                    & (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) 
                != (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) {
            if ((ham_key_get_intflags(key) & KEY_IS_GT) 
                && (hints.original_flags & HAM_FIND_LT_MATCH)) {
                /*
                 * if the index-1 is still in the page, just decrement the
                 * index
                 */
                if (idx > 0) {
                    idx--;
                }
                else {
                    /*
                     * otherwise load the left sibling page
                     */
                    if (!btree_node_get_left(node)) {
                        stats_update_find_fail(db, &hints);
                        ham_assert(node == ham_page_get_btree_node(page), (0));
                        stats_update_any_bound(db, page, key, hints.original_flags, -1);
                        return HAM_KEY_NOT_FOUND;
                    }

                    hints.cost++;
                    st = db_fetch_page(&page, db, btree_node_get_left(node), 0);
					ham_assert(st ? !page : 1, (0));
                    if (!page) {
                        ham_assert(st, (0));
                        stats_update_find_fail(db, &hints);
						return st ? st : HAM_INTERNAL_ERROR;
                    }
                    node = ham_page_get_btree_node(page);
                    idx = btree_node_get_count(node) - 1;
                }
                ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                        & ~KEY_IS_APPROXIMATE) | KEY_IS_LT);
            }
            else if ((ham_key_get_intflags(key) & KEY_IS_LT) 
                    && (hints.original_flags & HAM_FIND_GT_MATCH)) {
                /*
                 * if the index+1 is still in the page, just increment the
                 * index
                 */
                if (idx + 1 < btree_node_get_count(node)) {
                    idx++;
                }
                else {
                    /*
                     * otherwise load the right sibling page
                     */
                    if (!btree_node_get_right(node))
                    {
                        stats_update_find_fail(db, &hints);
                        ham_assert(node == ham_page_get_btree_node(page), (0));
                        stats_update_any_bound(db, page, key, hints.original_flags, -1);
                        return HAM_KEY_NOT_FOUND;
                    }

                    hints.cost++;
                    st = db_fetch_page(&page, db, 
                                    btree_node_get_right(node), 0);
                    if (!page) {
                        ham_assert(st, (0));
                        stats_update_find_fail(db, &hints);
						return st ? st : HAM_INTERNAL_ERROR;
                    }
                    node = ham_page_get_btree_node(page);
                    idx = 0;
                }
                ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                        & ~KEY_IS_APPROXIMATE) | KEY_IS_GT);
            }
        }
        else if (!(ham_key_get_intflags(key) & KEY_IS_APPROXIMATE) 
                && !(hints.original_flags & HAM_FIND_EXACT_MATCH) 
                && (hints.original_flags != 0)) {
            /* 
             * 'true GT/LT' has been added @ 2009/07/18 to complete 
             * the EQ/LEQ/GEQ/LT/GT functionality;
             *
             * 'true LT/GT' is simply an extension upon the already existing 
             * LEQ/GEQ logic just above; all we do here is move one record 
             * up/down as it just happens that we get an exact ('equal') 
             * match here.
             *
             * The fact that the LT/GT constants share their bits with the 
             * LEQ/GEQ flags so that LEQ==(LT|EXACT) and GEQ==(GT|EXACT) 
             * ensures that we can restrict our work to a simple adjustment 
             * right here; everything else has already been taken of by the 
             * LEQ/GEQ logic in the section above when the key has been 
             * flagged with the KEY_IS_APPROXIMATE flag.
             */
            if (hints.original_flags & HAM_FIND_LT_MATCH)
            {
                /*
                 * if the index-1 is still in the page, just decrement the
                 * index
                 */
                if (idx > 0)
                {
                    idx--;

                    ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                            & ~KEY_IS_APPROXIMATE) | KEY_IS_LT);
                }
                else
                {
                    /*
                     * otherwise load the left sibling page
                     */
                    if (!btree_node_get_left(node))
                    {
                        /* when an error is otherwise unavoidable, see if 
                           we have an escape route through GT? */

                        if (hints.original_flags & HAM_FIND_GT_MATCH)
                        {
                            /*
                             * if the index+1 is still in the page, just 
                             * increment the index
                             */
                            if (idx + 1 < btree_node_get_count(node))
                            {
                                idx++;
                            }
                            else
                            {
                                /*
                                 * otherwise load the right sibling page
                                 */
                                if (!btree_node_get_right(node))
                                {
                                    stats_update_find_fail(db, &hints);
                                    ham_assert(node == ham_page_get_btree_node(page), (0));
                                    stats_update_any_bound(db, page, key, hints.original_flags, -1);
                                    return HAM_KEY_NOT_FOUND;
                                }

                                hints.cost++;
                                st = db_fetch_page(&page, db,
                                                btree_node_get_right(node), 0);
                                if (!page)
                                {
                                    ham_assert(st, (0));
                                    stats_update_find_fail(db, &hints);
									return st ? st : HAM_INTERNAL_ERROR;
                                }
                                node = ham_page_get_btree_node(page);
                                idx = 0;
                            }
                            ham_key_set_intflags(key, (ham_key_get_intflags(key) & 
                                            ~KEY_IS_APPROXIMATE) | KEY_IS_GT);
                        }
                        else
                        {
                            stats_update_find_fail(db, &hints);
                            ham_assert(node == ham_page_get_btree_node(page), (0));
                            stats_update_any_bound(db, page, key, hints.original_flags, -1);
                            return HAM_KEY_NOT_FOUND;
                        }
                    }
                    else
                    {
                        hints.cost++;
                        st = db_fetch_page(&page, db,
                                        btree_node_get_left(node), 0);
                        if (!page)
                        {
                            ham_assert(st, (0));
                            stats_update_find_fail(db, &hints);
							return st ? st : HAM_INTERNAL_ERROR;
                        }
                        node = ham_page_get_btree_node(page);
                        idx = btree_node_get_count(node) - 1;

                        ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                                        & ~KEY_IS_APPROXIMATE) | KEY_IS_LT);
                    }
                }
            }
            else if (hints.original_flags & HAM_FIND_GT_MATCH)
            {
                /*
                 * if the index+1 is still in the page, just increment the
                 * index
                 */
                if (idx + 1 < btree_node_get_count(node))
                {
                    idx++;
                }
                else
                {
                    /*
                     * otherwise load the right sibling page
                     */
                    if (!btree_node_get_right(node))
                    {
                        stats_update_find_fail(db, &hints);
                        ham_assert(node == ham_page_get_btree_node(page), (0));
                        stats_update_any_bound(db, page, key, hints.original_flags, -1);
                        return HAM_KEY_NOT_FOUND;
                    }

                    hints.cost++;
                    st = db_fetch_page(&page, db, 
                                btree_node_get_right(node), 0);
                    if (!page)
                    {
                        ham_assert(st, (0));
                        stats_update_find_fail(db, &hints);
						return st ? st : HAM_INTERNAL_ERROR;
                    }
                    node = ham_page_get_btree_node(page);
                    idx = 0;
                }
                ham_key_set_intflags(key, (ham_key_get_intflags(key) 
                                        & ~KEY_IS_APPROXIMATE) | KEY_IS_GT);
            }
        }
    }

    if (idx<0) {
        stats_update_find_fail(db, &hints);
        ham_assert(node, (0));
        ham_assert(page, (0));
        ham_assert(node == ham_page_get_btree_node(page), (0));
        stats_update_any_bound(db, page, key, hints.original_flags, -1);
        return HAM_KEY_NOT_FOUND;
    }

    /* load the entry, and store record ID and key flags */
    entry=btree_node_get_key(db, node, idx);

    /* set the cursor-position to this key */
    if (cursor) {
        ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_UNCOUPLED), 
                ("coupling an uncoupled cursor, but need a nil-cursor"));
        ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_COUPLED), 
                ("coupling a coupled cursor, but need a nil-cursor"));
        page_add_cursor(page, (ham_cursor_t *)cursor);
        bt_cursor_set_flags(cursor, 
                bt_cursor_get_flags(cursor)|BT_CURSOR_FLAG_COUPLED);
        bt_cursor_set_coupled_page(cursor, page);
        bt_cursor_set_coupled_index(cursor, idx);
    }

    /*
     * during util_read_key and util_read_record, new pages might be needed,
     * and the page at which we're pointing could be moved out of memory; 
     * that would mean that the cursor would be uncoupled, and we're losing
     * the 'entry'-pointer. therefore we 'lock' the page by incrementing 
     * the reference counter
     */
    page_add_ref(page);
    ham_assert(btree_node_is_leaf(node), ("iterator points to internal node"));

    /* no need to load the key if we have an exact match: */
    if (key && (ham_key_get_intflags(key) & KEY_IS_APPROXIMATE)) 
    {
        ham_status_t st=util_read_key(db, entry, key);
        if (st) 
        {
            page_release_ref(page);
            stats_update_find_fail(db, &hints);
            return (st);
        }
    }

    if (record) 
    {
        ham_status_t st;
        record->_intflags=key_get_flags(entry);
        record->_rid=key_get_ptr(entry);
        st=util_read_record(db, record, flags);
        if (st) 
        {
            page_release_ref(page);
            stats_update_find_fail(db, &hints);
            return (st);
        }
    }

    page_release_ref(page);
    
    stats_update_find(db, page, &hints);
    ham_assert(node == ham_page_get_btree_node(page), (0));
    stats_update_any_bound(db, page, key, hints.original_flags, idx);

    return (0);
}
Esempio n. 4
0
ham_status_t
txn_commit(ham_txn_t *txn, ham_u32_t flags)
{
    ham_status_t st;
    ham_env_t *env=txn_get_env(txn);

    /*
     * are cursors attached to this txn? if yes, fail
     */
    if (txn_get_cursor_refcount(txn)) {
        ham_trace(("transaction cannot be committed till all attached "
                    "cursors are closed"));
        return HAM_CURSOR_STILL_OPEN;
    }

    /*
     * in case of logging: write after-images of all modified pages,
     * if they were modified by this transaction;
     * then write the transaction boundary
     */
    if (env_get_log(env) && !(txn_get_flags(txn)&HAM_TXN_READ_ONLY)) 
    {
        ham_page_t *head=txn_get_pagelist(txn);
        while (head) {
            ham_page_t *next;

            next=page_get_next(head, PAGE_LIST_TXN);
            if (page_get_dirty_txn(head)==txn_get_id(txn) 
                    || page_get_dirty_txn(head)==PAGE_DUMMY_TXN_ID) {
                st=ham_log_add_page_after(head);
                if (st) 
                    return st;
            }
            head=next;
        }

        st=ham_log_append_txn_commit(env_get_log(env), txn);
        if (st) 
            return st;
    }

    env_set_txn(env, 0);

    /*
     * flush the pages
     *
     * shouldn't use local var for the list head, as
     * txn_get_pagelist(txn) should be kept up to date and correctly
     * formatted while we call db_free_page() et al.
     */
    while (txn_get_pagelist(txn))
    {
        ham_page_t *head = txn_get_pagelist(txn);
        
        txn_get_pagelist(txn) = page_list_remove(head, PAGE_LIST_TXN, head);

        /* page is no longer in use */
        page_release_ref(head);

        /* 
         * delete the page? 
         */
        if (page_get_npers_flags(head)&PAGE_NPERS_DELETE_PENDING) {
            /* remove page from cache, add it to garbage list */
            page_set_undirty(head);
        
            st=db_free_page(head, DB_MOVE_TO_FREELIST);
            if (st)
                return (st);
        }
        else if (flags & HAM_TXN_FORCE_WRITE) {
            /* flush the page */
            st=db_flush_page(env, head, 
                    flags & HAM_TXN_FORCE_WRITE ? HAM_WRITE_THROUGH : 0);
            if (st) {
                page_add_ref(head);
                /* failure: re-insert into transaction list! */
                txn_get_pagelist(txn) = page_list_insert(txn_get_pagelist(txn),
                            PAGE_LIST_TXN, head);
                return (st);
            }
        }
    }

    txn_set_pagelist(txn, 0);

    return HAM_SUCCESS;
}
void 
stats_update_any_bound(ham_db_t *db, struct ham_page_t *page, ham_key_t *key, ham_u32_t find_flags, ham_s32_t slot)
{
    ham_status_t st;
    ham_runtime_statistics_dbdata_t *dbdata = db_get_db_perf_data(db);
    ham_env_t *env = db_get_env(db);
    btree_node_t *node = ham_page_get_btree_node(page);

    ham_assert(env_get_allocator(env) != 0, (0));
    ham_assert(btree_node_is_leaf(node), (0));
    if (!btree_node_get_left(node))
    {
        /* this is the leaf page which carries the lower bound key */
        ham_assert(btree_node_get_count(node) == 0 ? !btree_node_get_right(node) : 1, (0));
        if (btree_node_get_count(node) == 0)
        {
            /* range is empty 
             *
             * do not set the lower/upper boundary; otherwise we may trigger
             * a key comparison with an empty key, and the comparison function
             * could not be fit to handle this.

             EDIT: the code should be able to handle that particular situation
                   as this was tested a while ago. Besides, the settings here
                   are a signal for the hinter the table is currently 
                   completely empty and no btree traversal whatsoever is 
                   needed before we find, insert or erase.

             EDIT #2: custom compare routines may b0rk on the data NULL pointers
                   (the monster test comparison function does, for example),
                   so the smarter thing to do here is NOT set the bounds here.

                   The trouble with that approach is that the hinter no longer
                   'knows about' an empty table, but is that so bad? An empty
                   table would constitute only a btree root node anyway, so the
                   regular traversal would be quick anyhow.
             */
            if (dbdata->lower_bound_index != 1
                || dbdata->upper_bound_index != 0)
            {
                /* only set when not done already */
                if (dbdata->lower_bound.data)
                    allocator_free(env_get_allocator(env), dbdata->lower_bound.data);
                if (dbdata->upper_bound.data)
                    allocator_free(env_get_allocator(env), dbdata->upper_bound.data);
                memset(&dbdata->lower_bound, 0, sizeof(dbdata->lower_bound));
                memset(&dbdata->upper_bound, 0, sizeof(dbdata->upper_bound));
                dbdata->lower_bound_index = 1; /* impossible value for lower bound index */
                dbdata->upper_bound_index = 0;
                dbdata->lower_bound_page_address = page_get_self(page);
                dbdata->upper_bound_page_address = 0; /* page_get_self(page); */
                dbdata->lower_bound_set = HAM_TRUE;
                dbdata->upper_bound_set = HAM_FALSE; /* cannot be TRUE or subsequent updates for single record carrying tables may fail */
                //ham_assert(dbdata->lower_bound.data != NULL, (0));
                ham_assert(dbdata->lower_bound_page_address != 0, (0));
            }
        }
        else
        {
            /*
            lower bound key is always located at index [0]

            update our key info when either our current data is undefined (startup condition)
            or the first key was edited in some way (slot == 0). This 'copy anyway' approach 
            saves us one costly key comparison.
            */
            if (dbdata->lower_bound_index != 0
                || dbdata->lower_bound_page_address != page_get_self(page)
                || slot == 0)
            {
                page_add_ref(page);

                /* only set when not done already */
                dbdata->lower_bound_set = HAM_TRUE;
                dbdata->lower_bound_index = 0;
                dbdata->lower_bound_page_address = page_get_self(page);

                if (dbdata->lower_bound.data) {
                    allocator_free(env_get_allocator(env), dbdata->lower_bound.data);
                    dbdata->lower_bound.data=0;
                    dbdata->lower_bound.size=0;
                }

                st = util_copy_key_int2pub(db, 
                    btree_node_get_key(db, node, dbdata->lower_bound_index),
                    &dbdata->lower_bound);
                if (st) 
                {
                    /* panic! is case of failure, just drop the lower bound 
                     * entirely. */
                    if (dbdata->lower_bound.data)
                        allocator_free(env_get_allocator(env), dbdata->lower_bound.data);
                    memset(&dbdata->lower_bound, 0, 
                            sizeof(dbdata->lower_bound));
                    dbdata->lower_bound_index = 0;
                    dbdata->lower_bound_page_address = 0;
                    dbdata->lower_bound_set = HAM_FALSE;
                }
                else
                {
                    ham_assert(dbdata->lower_bound.data == NULL ?
                        dbdata->lower_bound.size == 0 : 
                        dbdata->lower_bound.size > 0, (0));
                    ham_assert(dbdata->lower_bound_page_address != 0, (0));
                }
                page_release_ref(page);
            }
        }
    }

    if (!btree_node_get_right(node)) 
    {
        /* this is the leaf page which carries the upper bound key */
        ham_assert(btree_node_get_count(node) == 0 
                ? !btree_node_get_left(node) 
                : 1, (0));
        if (btree_node_get_count(node) != 0) 
        {
            /* 
             * range is non-empty; the other case has already been handled 
             * above upper bound key is always located at index [size-1] 
             * update our key info when either our current data is 
             * undefined (startup condition) or the last key was edited in 
             * some way (slot == size-1). This 'copy anyway' approach 
             * saves us one costly key comparison.
             */
            if (dbdata->upper_bound_index != btree_node_get_count(node) - 1
                    || dbdata->upper_bound_page_address != page_get_self(page)
                    || slot == btree_node_get_count(node) - 1) 
            {
                page_add_ref(page);

                /* only set when not done already */
                dbdata->upper_bound_set = HAM_TRUE;
                dbdata->upper_bound_index = btree_node_get_count(node) - 1;
                dbdata->upper_bound_page_address = page_get_self(page);

                if (dbdata->upper_bound.data) {
                    allocator_free(env_get_allocator(env), dbdata->upper_bound.data);
                    dbdata->upper_bound.data=0;
                    dbdata->upper_bound.size=0;
                }

                st = util_copy_key_int2pub(db, 
                    btree_node_get_key(db, node, dbdata->upper_bound_index),
                    &dbdata->upper_bound);
                if (st) 
                {
                    /* panic! is case of failure, just drop the upper bound 
                     * entirely. */
                    if (dbdata->upper_bound.data)
                        allocator_free(env_get_allocator(env), dbdata->upper_bound.data);
                    memset(&dbdata->upper_bound, 0, 
                            sizeof(dbdata->upper_bound));
                    dbdata->upper_bound_index = 0;
                    dbdata->upper_bound_page_address = 0;
                    dbdata->upper_bound_set = HAM_FALSE;
                }
                page_release_ref(page);
            }
        }
    }
}
Esempio n. 6
0
static ham_status_t
__append_key(ham_btree_t *be, ham_key_t *key, ham_record_t *record, 
                ham_bt_cursor_t *cursor, insert_hints_t *hints)
{
    ham_status_t st=0;
    ham_page_t *page;
    btree_node_t *node;
    ham_db_t *db;

#ifdef HAM_DEBUG
    if (cursor && !bt_cursor_is_nil(cursor)) {
        ham_assert(be_get_db(be) == bt_cursor_get_db(cursor), (0));
    }
#endif

    db = be_get_db(be);

    /* 
     * see if we get this btree leaf; if not, revert to regular scan 
     *    
     * As this is a speed-improvement hint re-using recent material, the page 
     * should still sit in the cache, or we're using old info, which should be 
     * discarded.
     */
    st = db_fetch_page(&page, db, hints->leaf_page_addr, DB_ONLY_FROM_CACHE);
    if (st)
        return st;
    if (!page) {
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_FALSE;
        return (__insert_cursor(be, key, record, cursor, hints));
    }

    page_add_ref(page);
    node=ham_page_get_btree_node(page);
    ham_assert(btree_node_is_leaf(node), ("iterator points to internal node"));

    /*
     * if the page is already full OR this page is not the right-most page
     * when we APPEND or the left-most node when we PREPEND
     * OR the new key is not the highest key: perform a normal insert
     */
    if ((hints->force_append && btree_node_get_right(node))
            || (hints->force_prepend && btree_node_get_left(node))
            || btree_node_get_count(node) >= btree_get_maxkeys(be)) {
        page_release_ref(page);
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_FALSE;
        return (__insert_cursor(be, key, record, cursor, hints));
    }

    /*
     * if the page is not empty: check if we append the key at the end / start
     * (depending on force_append/force_prepend),
     * or if it's actually inserted in the middle (when neither force_append 
     * or force_prepend is specified: that'd be SEQUENTIAL insertion 
     * hinting somewhere in the middle of the total key range.
     */
    if (btree_node_get_count(node)!=0) {
        int cmp_hi;
        int cmp_lo;

        hints->cost++;
        if (!hints->force_prepend) {
            cmp_hi = key_compare_pub_to_int(db, page, key, 
                                btree_node_get_count(node)-1);
            /* key is in the middle */
            if (cmp_hi < -1) {
                page_release_ref(page);
                return (ham_status_t)cmp_hi;
            }
            /* key is at the end */
            if (cmp_hi > 0) {
                if (btree_node_get_right(node)) {
                    /* not at top end of the btree, so we can't do the 
                     * fast track */
                    page_release_ref(page);
                    //hints->flags &= ~HAM_HINT_APPEND;
                    hints->force_append = HAM_FALSE;
                    hints->force_prepend = HAM_FALSE;
                    return (__insert_cursor(be, key, record, cursor, hints));
                }

                hints->force_append = HAM_TRUE;
                hints->force_prepend = HAM_FALSE;
            }
        }
        else { /* hints->force_prepend is true */
            /* not bigger than the right-most node while we 
             * were trying to APPEND */
            cmp_hi = -1;
        }

        if (!hints->force_append) {
            cmp_lo = key_compare_pub_to_int(db, page, key, 0);
            /* in the middle range */
            if (cmp_lo < -1) {
                page_release_ref(page);
                return ((ham_status_t)cmp_lo);
            }
            /* key is at the start of page */
            if (cmp_lo < 0) {
                if (btree_node_get_left(node)) {
                    /* not at bottom end of the btree, so we can't 
                     * do the fast track */
                    page_release_ref(page);
                    //hints->flags &= ~HAM_HINT_PREPEND;
                    hints->force_append = HAM_FALSE;
                    hints->force_prepend = HAM_FALSE;
                    return (__insert_cursor(be, key, record, cursor, hints));
                }

                hints->force_append = HAM_FALSE;
                hints->force_prepend = HAM_TRUE;
            }
        }
        else { /* hints->force_prepend is true */
            /* not smaller than the left-most node while we were 
             * trying to PREPEND */
            cmp_lo = +1;
        }

        /* handle inserts in the middle range */
        if (cmp_lo >= 0 && cmp_hi <= 0) {
            /*
             * Depending on where we are in the btree, the current key either
             * is going to end up in the middle of the given node/page,
             * OR the given key is out of range of the given leaf node.
             */
            if (hints->force_append || hints->force_prepend) {
                /*
                 * when prepend or append is FORCED, we are expected to 
                 * add keys ONLY at the beginning or end of the btree
                 * key range. Clearly the current key does not fit that
                 * criterium.
                 */
                page_release_ref(page);
                //hints->flags &= ~HAM_HINT_PREPEND;
                hints->force_append = HAM_FALSE;
                hints->force_prepend = HAM_FALSE;
                return (__insert_cursor(be, key, record, cursor, hints));
            }

            /* 
             * we discovered that the key must be inserted in the middle 
             * of the current leaf.
             * 
             * It does not matter whether the current leaf is at the start or
             * end of the btree range; as we need to add the key in the middle
             * of the current leaf, that info alone is enough to continue with
             * the fast track insert operation.
             */
            ham_assert(!hints->force_prepend && !hints->force_append, (0));
        }

        ham_assert((hints->force_prepend + hints->force_append) < 2, 
                ("Either APPEND or PREPEND flag MAY be set, but not both"));
    }
    else { /* empty page: force insertion in slot 0 */
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_TRUE;
    }

    /*
     * the page will be changed - write it to the log (if a log exists)
     */
    st=ham_log_add_page_before(page);
    if (st) {
        page_release_ref(page);
        return (st);
    }

    /*
     * OK - we're really appending/prepending the new key.
     */
    ham_assert(hints->force_append || hints->force_prepend, (0));
    st=__insert_nosplit(page, key, 0, record, cursor, hints);

    page_release_ref(page);
    return (st);
}