void 
btree_insert_get_hints(insert_hints_t *hints, ham_db_t *db, ham_key_t *key)
{
    ham_runtime_statistics_dbdata_t *dbdata = db_get_db_perf_data(db);
    ham_bt_cursor_t *cursor = (ham_bt_cursor_t *)hints->cursor;

    ham_assert(hints->force_append == HAM_FALSE, (0));
    ham_assert(hints->force_prepend == HAM_FALSE, (0));
    ham_assert(hints->try_fast_track == HAM_FALSE, (0));

    if ((hints->flags & HAM_HINT_APPEND) && (cursor))
    {
        if (!bt_cursor_is_nil(cursor))
        {
            ham_assert(bt_cursor_is_nil(cursor)==0, ("cursor must not be nil"));
            ham_assert(db == bt_cursor_get_db(cursor), (0));

            /*
             fetch the page of the cursor. We deem the cost of an uncoupled cursor 
             too high as that implies calling a full-fledged key search on the
             given key - which can be rather costly - so we rather wait for the
             statistical cavalry a little later on in this program then.
             */
            if (bt_cursor_get_flags(cursor) & BT_CURSOR_FLAG_COUPLED) 
            {
                ham_page_t *page = bt_cursor_get_coupled_page(cursor);
                btree_node_t *node = ham_page_get_btree_node(page);
                ham_assert(btree_node_is_leaf(node), 
                            ("cursor points to internal node"));
                //ham_assert(!btree_node_get_right(node), ("cursor points to leaf node which is NOT the uppermost/last one"));
                /*
                 * if cursor is not coupled to the LAST (right-most) leaf
                 * in the Database, it does not make sense to append
                 */
                if (btree_node_get_right(node)) {
                    hints->force_append = HAM_FALSE;
                    hints->try_fast_track = HAM_FALSE;
                }
                else {
                    hints->leaf_page_addr = page_get_self(page);
                    hints->force_append = HAM_TRUE;
                    hints->try_fast_track = HAM_TRUE;
                }
            }
        }
    }
    else if ((hints->flags & HAM_HINT_PREPEND) && (cursor))
    {
        if (!bt_cursor_is_nil(cursor))
        {
            ham_assert(bt_cursor_is_nil(cursor)==0, ("cursor must not be nil"));
            ham_assert(db == bt_cursor_get_db(cursor), (0));

            /*
             fetch the page of the cursor. We deem the cost of an uncoupled cursor 
             too high as that implies calling a full-fledged key search on the
             given key - which can be rather costly - so we rather wait for the
             statistical cavalry a little later on in this program then.
             */
            if (bt_cursor_get_flags(cursor) & BT_CURSOR_FLAG_COUPLED) 
            {
                ham_page_t *page = bt_cursor_get_coupled_page(cursor);
                btree_node_t *node = ham_page_get_btree_node(page);
                ham_assert(btree_node_is_leaf(node), 
                        ("cursor points to internal node"));
                //ham_assert(!btree_node_get_left(node), ("cursor points to leaf node which is NOT the lowest/first one"));
                /*
                 * if cursor is not coupled to the FIRST (left-most) leaf
                 * in the Database, it does not make sense to prepend
                 */
                if (btree_node_get_left(node)) {
                    hints->force_prepend = HAM_FALSE;
                    hints->try_fast_track = HAM_FALSE;
                }
                else {
                    hints->leaf_page_addr = page_get_self(page);
                    hints->force_prepend = HAM_TRUE;
                    hints->try_fast_track = HAM_TRUE;
                }
            }
        }
    }
    //hints->flags &= ~(HAM_HINT_APPEND | HAM_HINT_PREPEND);

    /* 
    The statistical cavalry:

    - when the given key is positioned beyond the end, hint 'append' anyway.

    - When the given key is positioned before the start, hint 'prepend' anyway.
    
    NOTE: This 'auto-detect' mechanism (thanks to the key bounds being collected through
    the statistics gathering calls) renders the manual option HAM_HINT_APPEND/_PREPEND
    somewhat obsolete, really. 

    The only advantage of manually specifying HAM_HINT_APPEND/_PREPEND is that it can save you
    two key comparisons in here.
    */
    ham_assert(!(key->_flags & KEY_IS_EXTENDED), (0));
    key->_flags &= ~KEY_IS_EXTENDED;

    if (!hints->try_fast_track)
    {
        ham_runtime_statistics_opdbdata_t *opstats = db_get_op_perf_data(db, HAM_OPERATION_STATS_INSERT);

        ham_assert(opstats != NULL, (0));

        if (hints->flags & (HAM_HINT_APPEND | HAM_HINT_PREPEND))
        {
            /* find specific: APPEND / PREPEND --> SEQUENTIAL */
            hints->flags &= ~(HAM_HINT_APPEND | HAM_HINT_PREPEND); 
            hints->flags |= HAM_HINT_SEQUENTIAL;
        }

        if ((hints->flags & HAM_HINTS_MASK) == 0)
        {
            /* no local preference specified; go with the DB-wide DAM config */
            switch (db_get_data_access_mode(db) & ~HAM_DAM_ENFORCE_PRE110_FORMAT)
            {
            default:
                break;

            case HAM_DAM_SEQUENTIAL_INSERT:
                hints->flags |= HAM_HINT_SEQUENTIAL;
                break;
            }
        }

        switch (hints->flags & HAM_HINTS_MASK)
        {
        default:
        case HAM_HINT_RANDOM_ACCESS:
            /* do not provide any hints for the fast track */
            break;

        case HAM_HINT_SEQUENTIAL:
            /*
            when we have more than 4 hits on the same page already, we'll assume this one 
            will end up there as well. As this counter will reset itself on the first FAIL,
            there's no harm in acting this quick. In pathological cases, the worst what
            can happen is that in 20% of cases there will be performed an extra check on
            a cached btree leaf node, which is still minimal overhead then.
            */
            if (opstats->btree_last_page_sq_hits >= 3)
            {
                hints->leaf_page_addr = opstats->btree_last_page_addr;
                hints->try_fast_track = HAM_TRUE;
                break;
            }
            /* fall through! */
            if (0)
            {
        case HAM_HINT_SEQUENTIAL | HAM_HINT_UBER_FAST_ACCESS:
            /* same as above, but now act as fast as possible on this info */
            if (opstats->btree_last_page_sq_hits >= 1)
            {
                hints->leaf_page_addr = opstats->btree_last_page_addr;
                hints->try_fast_track = HAM_TRUE;
                break;
            }
            }
            {
                /* 
                we assume this request is located near the previous request, so we check
                if there's anything in the statistics that can help out.

                Note #1: since the hinting counts are 'aged' down to a value of 0..1K (with 2K peak),
                we don't need to use a 64-bit integer for the ratio calculation here.

                Note #2: the ratio is only 'trustworthy' when the base count is about 4 or higher.
                This is because the aging rounds up while scaling down, which means one single FAIL
                can get you a ratio as large as 50% when total count is 1 as well, due to
                either startup or aging rescale; without this minimum size check, the ratio + aging
                would effectively stop the hinter from working after either an aging step or when
                a few FAILs happen during the initial few FIND operations (startup condition).

                EDIT: the above bit about the hinter stopping due to too much FAIL at start or after
                rescale does NOT apply any more as the hinter now also includes checks which trigger
                when a (small) series of hits on the same page are found, which acts as a restarter
                for this as well.
                */
                ham_u32_t ratio = opstats->btree_hinting_fail_count;

                ratio = ratio * 1000 / (1 + opstats->btree_hinting_count);
                if (ratio < 200)
                {
                    hints->leaf_page_addr = opstats->btree_last_page_addr;
                    hints->try_fast_track = HAM_TRUE;
                    hints->force_append = HAM_TRUE;
                }
            }
            
            if (dbdata->lower_bound_set)
            {
                if (dbdata->lower_bound_index == 1)
                {
                    /*
                    impossible index: this is a marker to signal the table 
                    is completely empty
                    */
                    //hints->flags |= HAM_HINT_PREPEND;
                    hints->force_prepend = HAM_TRUE;
                    hints->leaf_page_addr = dbdata->lower_bound_page_address;
                    hints->try_fast_track = HAM_TRUE;
                }
                else
                {
                    int cmp;
                    
                    ham_assert(dbdata->lower_bound_index == 0, (0));
                    ham_assert(dbdata->lower_bound.data == NULL ?
                        dbdata->lower_bound.size == 0 : 
                        dbdata->lower_bound.size > 0, (0));
                    ham_assert(dbdata->lower_bound_page_address != 0, (0));
                    cmp = db_compare_keys(db, key, &dbdata->lower_bound);

                    if (cmp < 0)
                    {
                        //hints->flags |= HAM_HINT_PREPEND;
                        hints->force_prepend = HAM_TRUE;
                        hints->leaf_page_addr = dbdata->lower_bound_page_address;
                        hints->try_fast_track = HAM_TRUE;
                    }
                }
            }

            if (dbdata->upper_bound_set)
            {
                int cmp;
                
                ham_assert(dbdata->upper_bound_index >= 0, (0));
                ham_assert(dbdata->upper_bound.data == NULL ?
                    dbdata->upper_bound.size == 0 : 
                    dbdata->upper_bound.size > 0, (0));
                ham_assert(dbdata->upper_bound_page_address != 0, (0));
                cmp = db_compare_keys(db, key, &dbdata->upper_bound);

                if (cmp > 0)
                {
                    //hints->flags |= HAM_HINT_APPEND;
                    hints->force_append = HAM_TRUE;
                    hints->leaf_page_addr = dbdata->upper_bound_page_address;
                    hints->try_fast_track = HAM_TRUE;
                }
            }
            break;
        }
    }

    /* 
    we don't yet hint about jumping to the last accessed leaf node immediately (yet)
    
    EDIT:
    
    now we do: see the flags + dam code above: this happens when neither PREPEND 
    nor APPEND hints are specified 
    */
}
Exemple #2
0
static ham_status_t
__append_key(ham_btree_t *be, ham_key_t *key, ham_record_t *record, 
                ham_bt_cursor_t *cursor, insert_hints_t *hints)
{
    ham_status_t st=0;
    ham_page_t *page;
    btree_node_t *node;
    ham_db_t *db;

#ifdef HAM_DEBUG
    if (cursor && !bt_cursor_is_nil(cursor)) {
        ham_assert(be_get_db(be) == bt_cursor_get_db(cursor), (0));
    }
#endif

    db = be_get_db(be);

    /* 
     * see if we get this btree leaf; if not, revert to regular scan 
     *    
     * As this is a speed-improvement hint re-using recent material, the page 
     * should still sit in the cache, or we're using old info, which should be 
     * discarded.
     */
    st = db_fetch_page(&page, db, hints->leaf_page_addr, DB_ONLY_FROM_CACHE);
    if (st)
        return st;
    if (!page) {
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_FALSE;
        return (__insert_cursor(be, key, record, cursor, hints));
    }

    page_add_ref(page);
    node=ham_page_get_btree_node(page);
    ham_assert(btree_node_is_leaf(node), ("iterator points to internal node"));

    /*
     * if the page is already full OR this page is not the right-most page
     * when we APPEND or the left-most node when we PREPEND
     * OR the new key is not the highest key: perform a normal insert
     */
    if ((hints->force_append && btree_node_get_right(node))
            || (hints->force_prepend && btree_node_get_left(node))
            || btree_node_get_count(node) >= btree_get_maxkeys(be)) {
        page_release_ref(page);
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_FALSE;
        return (__insert_cursor(be, key, record, cursor, hints));
    }

    /*
     * if the page is not empty: check if we append the key at the end / start
     * (depending on force_append/force_prepend),
     * or if it's actually inserted in the middle (when neither force_append 
     * or force_prepend is specified: that'd be SEQUENTIAL insertion 
     * hinting somewhere in the middle of the total key range.
     */
    if (btree_node_get_count(node)!=0) {
        int cmp_hi;
        int cmp_lo;

        hints->cost++;
        if (!hints->force_prepend) {
            cmp_hi = key_compare_pub_to_int(db, page, key, 
                                btree_node_get_count(node)-1);
            /* key is in the middle */
            if (cmp_hi < -1) {
                page_release_ref(page);
                return (ham_status_t)cmp_hi;
            }
            /* key is at the end */
            if (cmp_hi > 0) {
                if (btree_node_get_right(node)) {
                    /* not at top end of the btree, so we can't do the 
                     * fast track */
                    page_release_ref(page);
                    //hints->flags &= ~HAM_HINT_APPEND;
                    hints->force_append = HAM_FALSE;
                    hints->force_prepend = HAM_FALSE;
                    return (__insert_cursor(be, key, record, cursor, hints));
                }

                hints->force_append = HAM_TRUE;
                hints->force_prepend = HAM_FALSE;
            }
        }
        else { /* hints->force_prepend is true */
            /* not bigger than the right-most node while we 
             * were trying to APPEND */
            cmp_hi = -1;
        }

        if (!hints->force_append) {
            cmp_lo = key_compare_pub_to_int(db, page, key, 0);
            /* in the middle range */
            if (cmp_lo < -1) {
                page_release_ref(page);
                return ((ham_status_t)cmp_lo);
            }
            /* key is at the start of page */
            if (cmp_lo < 0) {
                if (btree_node_get_left(node)) {
                    /* not at bottom end of the btree, so we can't 
                     * do the fast track */
                    page_release_ref(page);
                    //hints->flags &= ~HAM_HINT_PREPEND;
                    hints->force_append = HAM_FALSE;
                    hints->force_prepend = HAM_FALSE;
                    return (__insert_cursor(be, key, record, cursor, hints));
                }

                hints->force_append = HAM_FALSE;
                hints->force_prepend = HAM_TRUE;
            }
        }
        else { /* hints->force_prepend is true */
            /* not smaller than the left-most node while we were 
             * trying to PREPEND */
            cmp_lo = +1;
        }

        /* handle inserts in the middle range */
        if (cmp_lo >= 0 && cmp_hi <= 0) {
            /*
             * Depending on where we are in the btree, the current key either
             * is going to end up in the middle of the given node/page,
             * OR the given key is out of range of the given leaf node.
             */
            if (hints->force_append || hints->force_prepend) {
                /*
                 * when prepend or append is FORCED, we are expected to 
                 * add keys ONLY at the beginning or end of the btree
                 * key range. Clearly the current key does not fit that
                 * criterium.
                 */
                page_release_ref(page);
                //hints->flags &= ~HAM_HINT_PREPEND;
                hints->force_append = HAM_FALSE;
                hints->force_prepend = HAM_FALSE;
                return (__insert_cursor(be, key, record, cursor, hints));
            }

            /* 
             * we discovered that the key must be inserted in the middle 
             * of the current leaf.
             * 
             * It does not matter whether the current leaf is at the start or
             * end of the btree range; as we need to add the key in the middle
             * of the current leaf, that info alone is enough to continue with
             * the fast track insert operation.
             */
            ham_assert(!hints->force_prepend && !hints->force_append, (0));
        }

        ham_assert((hints->force_prepend + hints->force_append) < 2, 
                ("Either APPEND or PREPEND flag MAY be set, but not both"));
    }
    else { /* empty page: force insertion in slot 0 */
        hints->force_append = HAM_FALSE;
        hints->force_prepend = HAM_TRUE;
    }

    /*
     * the page will be changed - write it to the log (if a log exists)
     */
    st=ham_log_add_page_before(page);
    if (st) {
        page_release_ref(page);
        return (st);
    }

    /*
     * OK - we're really appending/prepending the new key.
     */
    ham_assert(hints->force_append || hints->force_prepend, (0));
    st=__insert_nosplit(page, key, 0, record, cursor, hints);

    page_release_ref(page);
    return (st);
}