ham_status_t btree_traverse_tree(ham_page_t **page_ref, ham_s32_t *idxptr, ham_db_t *db, ham_page_t *page, ham_key_t *key) { ham_status_t st; ham_s32_t slot; int_key_t *bte; btree_node_t *node=ham_page_get_btree_node(page); /* * make sure that we're not in a leaf page, and that the * page is not empty */ ham_assert(btree_node_get_count(node)>0, (0)); ham_assert(btree_node_get_ptr_left(node)!=0, (0)); st=btree_get_slot(db, page, key, &slot, 0); if (st) { *page_ref = 0; return st; } if (idxptr) *idxptr=slot; if (slot==-1) { st = db_fetch_page(page_ref, db, btree_node_get_ptr_left(node), 0); ham_assert(st ? !*page_ref : 1, (0)); return st; } else { bte=btree_node_get_key(db, node, slot); ham_assert(key_get_flags(bte)==0 || key_get_flags(bte)==KEY_IS_EXTENDED, ("invalid key flags 0x%x", key_get_flags(bte))); st = db_fetch_page(page_ref, db, key_get_ptr(bte), 0); ham_assert(st ? !*page_ref : 1, (0)); return st; } }
ham_status_t btree_find_cursor(ham_btree_t *be, ham_bt_cursor_t *cursor, ham_key_t *key, ham_record_t *record, ham_u32_t flags) { ham_status_t st; ham_page_t *page = NULL; btree_node_t *node = NULL; int_key_t *entry; ham_s32_t idx = -1; ham_db_t *db=be_get_db(be); find_hints_t hints = {flags, flags, 0, HAM_FALSE, HAM_FALSE, 1}; btree_find_get_hints(&hints, db, key); if (hints.key_is_out_of_bounds) { stats_update_find_fail_oob(db, &hints); return HAM_KEY_NOT_FOUND; } if (hints.try_fast_track) { /* * see if we get a sure hit within this btree leaf; if not, revert to * regular scan * * As this is a speed-improvement hint re-using recent material, the * page should still sit in the cache, or we're using old info, which * should be discarded. */ st = db_fetch_page(&page, db, hints.leaf_page_addr, DB_ONLY_FROM_CACHE); ham_assert(st ? !page : 1, (0)); if (st) return st; if (page) { node=ham_page_get_btree_node(page); ham_assert(btree_node_is_leaf(node), (0)); ham_assert(btree_node_get_count(node) >= 3, (0)); /* edges + middle match */ idx = btree_node_search_by_key(db, page, key, hints.flags); /* * if we didn't hit a match OR a match at either edge, FAIL. * A match at one of the edges is very risky, as this can also * signal a match far away from the current node, so we need * the full tree traversal then. */ if (idx <= 0 || idx >= btree_node_get_count(node) - 1) { idx = -1; } /* * else: we landed in the middle of the node, so we don't need to * traverse the entire tree now. */ } /* Reset any errors which may have been collected during the hinting * phase -- this is done by setting 'idx = -1' above as that effectively * clears the possible error code stored in there when (idx < -1) */ } if (idx == -1) { /* get the address of the root page */ if (!btree_get_rootpage(be)) { stats_update_find_fail(db, &hints); return HAM_KEY_NOT_FOUND; } /* load the root page */ st=db_fetch_page(&page, db, btree_get_rootpage(be), 0); ham_assert(st ? !page : 1, (0)); if (!page) { ham_assert(st, (0)); stats_update_find_fail(db, &hints); return st ? st : HAM_INTERNAL_ERROR; } /* now traverse the root to the leaf nodes, till we find a leaf */ node=ham_page_get_btree_node(page); if (!btree_node_is_leaf(node)) { /* signal 'don't care' when we have multiple pages; we resolve this once we've got a hit further down */ if (hints.flags & (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) hints.flags |= (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH); for (;;) { hints.cost++; st=btree_traverse_tree(&page, 0, db, page, key); if (!page) { stats_update_find_fail(db, &hints); return st ? st : HAM_KEY_NOT_FOUND; } node=ham_page_get_btree_node(page); if (btree_node_is_leaf(node)) break; } } /* check the leaf page for the key */ idx=btree_node_search_by_key(db, page, key, hints.flags); if (idx < -1) { stats_update_find_fail(db, &hints); return (ham_status_t)idx; } } /* end of regular search */ /* * When we are performing an approximate match, the worst case * scenario is where we've picked the wrong side of the fence * while sitting at a page/node boundary: that's what this * next piece of code resolves: * * essentially it moves one record forwards or backward when * the flags tell us this is mandatory and we're not yet in the proper * position yet. * * The whole trick works, because the code above detects when * we need to traverse a multi-page btree -- where this worst-case * scenario can happen -- and adjusted the flags to accept * both LT and GT approximate matches so that btree_node_search_by_key() * will be hard pressed to return a 'key not found' signal (idx==-1), * instead delivering the nearest LT or GT match; all we need to * do now is ensure we've got the right one and if not, * shift by one. */ if (idx >= 0) { if ((ham_key_get_intflags(key) & KEY_IS_APPROXIMATE) && (hints.original_flags & (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) != (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH)) { if ((ham_key_get_intflags(key) & KEY_IS_GT) && (hints.original_flags & HAM_FIND_LT_MATCH)) { /* * if the index-1 is still in the page, just decrement the * index */ if (idx > 0) { idx--; } else { /* * otherwise load the left sibling page */ if (!btree_node_get_left(node)) { stats_update_find_fail(db, &hints); ham_assert(node == ham_page_get_btree_node(page), (0)); stats_update_any_bound(db, page, key, hints.original_flags, -1); return HAM_KEY_NOT_FOUND; } hints.cost++; st = db_fetch_page(&page, db, btree_node_get_left(node), 0); ham_assert(st ? !page : 1, (0)); if (!page) { ham_assert(st, (0)); stats_update_find_fail(db, &hints); return st ? st : HAM_INTERNAL_ERROR; } node = ham_page_get_btree_node(page); idx = btree_node_get_count(node) - 1; } ham_key_set_intflags(key, (ham_key_get_intflags(key) & ~KEY_IS_APPROXIMATE) | KEY_IS_LT); } else if ((ham_key_get_intflags(key) & KEY_IS_LT) && (hints.original_flags & HAM_FIND_GT_MATCH)) { /* * if the index+1 is still in the page, just increment the * index */ if (idx + 1 < btree_node_get_count(node)) { idx++; } else { /* * otherwise load the right sibling page */ if (!btree_node_get_right(node)) { stats_update_find_fail(db, &hints); ham_assert(node == ham_page_get_btree_node(page), (0)); stats_update_any_bound(db, page, key, hints.original_flags, -1); return HAM_KEY_NOT_FOUND; } hints.cost++; st = db_fetch_page(&page, db, btree_node_get_right(node), 0); if (!page) { ham_assert(st, (0)); stats_update_find_fail(db, &hints); return st ? st : HAM_INTERNAL_ERROR; } node = ham_page_get_btree_node(page); idx = 0; } ham_key_set_intflags(key, (ham_key_get_intflags(key) & ~KEY_IS_APPROXIMATE) | KEY_IS_GT); } } else if (!(ham_key_get_intflags(key) & KEY_IS_APPROXIMATE) && !(hints.original_flags & HAM_FIND_EXACT_MATCH) && (hints.original_flags != 0)) { /* * 'true GT/LT' has been added @ 2009/07/18 to complete * the EQ/LEQ/GEQ/LT/GT functionality; * * 'true LT/GT' is simply an extension upon the already existing * LEQ/GEQ logic just above; all we do here is move one record * up/down as it just happens that we get an exact ('equal') * match here. * * The fact that the LT/GT constants share their bits with the * LEQ/GEQ flags so that LEQ==(LT|EXACT) and GEQ==(GT|EXACT) * ensures that we can restrict our work to a simple adjustment * right here; everything else has already been taken of by the * LEQ/GEQ logic in the section above when the key has been * flagged with the KEY_IS_APPROXIMATE flag. */ if (hints.original_flags & HAM_FIND_LT_MATCH) { /* * if the index-1 is still in the page, just decrement the * index */ if (idx > 0) { idx--; ham_key_set_intflags(key, (ham_key_get_intflags(key) & ~KEY_IS_APPROXIMATE) | KEY_IS_LT); } else { /* * otherwise load the left sibling page */ if (!btree_node_get_left(node)) { /* when an error is otherwise unavoidable, see if we have an escape route through GT? */ if (hints.original_flags & HAM_FIND_GT_MATCH) { /* * if the index+1 is still in the page, just * increment the index */ if (idx + 1 < btree_node_get_count(node)) { idx++; } else { /* * otherwise load the right sibling page */ if (!btree_node_get_right(node)) { stats_update_find_fail(db, &hints); ham_assert(node == ham_page_get_btree_node(page), (0)); stats_update_any_bound(db, page, key, hints.original_flags, -1); return HAM_KEY_NOT_FOUND; } hints.cost++; st = db_fetch_page(&page, db, btree_node_get_right(node), 0); if (!page) { ham_assert(st, (0)); stats_update_find_fail(db, &hints); return st ? st : HAM_INTERNAL_ERROR; } node = ham_page_get_btree_node(page); idx = 0; } ham_key_set_intflags(key, (ham_key_get_intflags(key) & ~KEY_IS_APPROXIMATE) | KEY_IS_GT); } else { stats_update_find_fail(db, &hints); ham_assert(node == ham_page_get_btree_node(page), (0)); stats_update_any_bound(db, page, key, hints.original_flags, -1); return HAM_KEY_NOT_FOUND; } } else { hints.cost++; st = db_fetch_page(&page, db, btree_node_get_left(node), 0); if (!page) { ham_assert(st, (0)); stats_update_find_fail(db, &hints); return st ? st : HAM_INTERNAL_ERROR; } node = ham_page_get_btree_node(page); idx = btree_node_get_count(node) - 1; ham_key_set_intflags(key, (ham_key_get_intflags(key) & ~KEY_IS_APPROXIMATE) | KEY_IS_LT); } } } else if (hints.original_flags & HAM_FIND_GT_MATCH) { /* * if the index+1 is still in the page, just increment the * index */ if (idx + 1 < btree_node_get_count(node)) { idx++; } else { /* * otherwise load the right sibling page */ if (!btree_node_get_right(node)) { stats_update_find_fail(db, &hints); ham_assert(node == ham_page_get_btree_node(page), (0)); stats_update_any_bound(db, page, key, hints.original_flags, -1); return HAM_KEY_NOT_FOUND; } hints.cost++; st = db_fetch_page(&page, db, btree_node_get_right(node), 0); if (!page) { ham_assert(st, (0)); stats_update_find_fail(db, &hints); return st ? st : HAM_INTERNAL_ERROR; } node = ham_page_get_btree_node(page); idx = 0; } ham_key_set_intflags(key, (ham_key_get_intflags(key) & ~KEY_IS_APPROXIMATE) | KEY_IS_GT); } } } if (idx<0) { stats_update_find_fail(db, &hints); ham_assert(node, (0)); ham_assert(page, (0)); ham_assert(node == ham_page_get_btree_node(page), (0)); stats_update_any_bound(db, page, key, hints.original_flags, -1); return HAM_KEY_NOT_FOUND; } /* load the entry, and store record ID and key flags */ entry=btree_node_get_key(db, node, idx); /* set the cursor-position to this key */ if (cursor) { ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_UNCOUPLED), ("coupling an uncoupled cursor, but need a nil-cursor")); ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_COUPLED), ("coupling a coupled cursor, but need a nil-cursor")); page_add_cursor(page, (ham_cursor_t *)cursor); bt_cursor_set_flags(cursor, bt_cursor_get_flags(cursor)|BT_CURSOR_FLAG_COUPLED); bt_cursor_set_coupled_page(cursor, page); bt_cursor_set_coupled_index(cursor, idx); } /* * during util_read_key and util_read_record, new pages might be needed, * and the page at which we're pointing could be moved out of memory; * that would mean that the cursor would be uncoupled, and we're losing * the 'entry'-pointer. therefore we 'lock' the page by incrementing * the reference counter */ page_add_ref(page); ham_assert(btree_node_is_leaf(node), ("iterator points to internal node")); /* no need to load the key if we have an exact match: */ if (key && (ham_key_get_intflags(key) & KEY_IS_APPROXIMATE)) { ham_status_t st=util_read_key(db, entry, key); if (st) { page_release_ref(page); stats_update_find_fail(db, &hints); return (st); } } if (record) { ham_status_t st; record->_intflags=key_get_flags(entry); record->_rid=key_get_ptr(entry); st=util_read_record(db, record, flags); if (st) { page_release_ref(page); stats_update_find_fail(db, &hints); return (st); } } page_release_ref(page); stats_update_find(db, page, &hints); ham_assert(node == ham_page_get_btree_node(page), (0)); stats_update_any_bound(db, page, key, hints.original_flags, idx); return (0); }
static ham_status_t __insert_split(ham_page_t *page, ham_key_t *key, ham_offset_t rid, insert_scratchpad_t *scratchpad, insert_hints_t *hints) { int cmp; ham_status_t st; ham_page_t *newpage, *oldsib; int_key_t *nbte, *obte; btree_node_t *nbtp, *obtp, *sbtp; ham_size_t count, keysize; ham_db_t *db=page_get_owner(page); ham_env_t *env = db_get_env(db); ham_key_t pivotkey, oldkey; ham_offset_t pivotrid; ham_u16_t pivot; ham_bool_t pivot_at_end=HAM_FALSE; ham_assert(page_get_owner(page), (0)); ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0)); ham_assert(hints->force_append == HAM_FALSE, (0)); keysize=db_get_keysize(db); /* * allocate a new page */ hints->cost++; st=db_alloc_page(&newpage, db, PAGE_TYPE_B_INDEX, 0); ham_assert(st ? page == NULL : 1, (0)); ham_assert(!st ? page != NULL : 1, (0)); if (st) return st; ham_assert(page_get_owner(newpage), ("")); /* clear the node header */ memset(page_get_payload(newpage), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, page, HAM_TRUE); /* * move half of the key/rid-tuples to the new page * * !! recno: keys are sorted; we do a "lazy split" */ nbtp=ham_page_get_btree_node(newpage); nbte=btree_node_get_key(db, nbtp, 0); obtp=ham_page_get_btree_node(page); obte=btree_node_get_key(db, obtp, 0); count=btree_node_get_count(obtp); /* * for databases with sequential access (this includes recno databases): * do not split in the middle, but at the very end of the page * * if this page is the right-most page in the index, and this key is * inserted at the very end, then we select the same pivot as for * sequential access */ if (db_get_data_access_mode(db)&HAM_DAM_SEQUENTIAL_INSERT) pivot_at_end=HAM_TRUE; else if (btree_node_get_right(obtp)==0) { cmp=key_compare_pub_to_int(db, page, key, btree_node_get_count(obtp)-1); if (cmp>0) pivot_at_end=HAM_TRUE; } /* * internal pages set the count of the new page to count-pivot-1 (because * the pivot element will become ptr_left of the new page). * by using pivot=count-2 we make sure that at least 1 element will remain * in the new node. */ if (pivot_at_end) { pivot=count-2; } else { pivot=count/2; } /* * uncouple all cursors */ st=bt_uncouple_all_cursors(page, pivot); if (st) return (st); /* * if we split a leaf, we'll insert the pivot element in the leaf * page, too. in internal nodes, we don't insert it, but propagate * it to the parent node only. */ if (btree_node_is_leaf(obtp)) { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*pivot, (db_get_int_key_header_size()+keysize)*(count-pivot)); } else { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot-1)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*(pivot+1), (db_get_int_key_header_size()+keysize)*(count-pivot-1)); } /* * store the pivot element, we'll need it later to propagate it * to the parent page */ nbte=btree_node_get_key(db, obtp, pivot); memset(&pivotkey, 0, sizeof(pivotkey)); memset(&oldkey, 0, sizeof(oldkey)); oldkey.data=key_get_key(nbte); oldkey.size=key_get_size(nbte); oldkey._flags=key_get_flags(nbte); st = util_copy_key(db, &oldkey, &pivotkey); if (st) { (void)db_free_page(newpage, DB_MOVE_TO_FREELIST); goto fail_dramatically; } pivotrid=page_get_self(newpage); /* * adjust the page count */ if (btree_node_is_leaf(obtp)) { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot); } else { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot-1); } /* * if we're in an internal page: fix the ptr_left of the new page * (it points to the ptr of the pivot key) */ if (!btree_node_is_leaf(obtp)) { /* * nbte still contains the pivot key */ btree_node_set_ptr_left(nbtp, key_get_ptr(nbte)); } /* * insert the new element */ hints->cost++; cmp=key_compare_pub_to_int(db, page, key, pivot); if (cmp < -1) { st = (ham_status_t)cmp; goto fail_dramatically; } if (cmp>=0) st=__insert_nosplit(newpage, key, rid, scratchpad->record, scratchpad->cursor, hints); else st=__insert_nosplit(page, key, rid, scratchpad->record, scratchpad->cursor, hints); if (st) { goto fail_dramatically; } scratchpad->cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ /* * fix the double-linked list of pages, and mark the pages as dirty */ if (btree_node_get_right(obtp)) { st=db_fetch_page(&oldsib, db, btree_node_get_right(obtp), 0); if (st) goto fail_dramatically; } else { oldsib=0; } if (oldsib) { st=ham_log_add_page_before(oldsib); if (st) goto fail_dramatically; } btree_node_set_left (nbtp, page_get_self(page)); btree_node_set_right(nbtp, btree_node_get_right(obtp)); btree_node_set_right(obtp, page_get_self(newpage)); if (oldsib) { sbtp=ham_page_get_btree_node(oldsib); btree_node_set_left(sbtp, page_get_self(newpage)); page_set_dirty(oldsib, env); } page_set_dirty(newpage, env); page_set_dirty(page, env); /* * propagate the pivot key to the parent page */ ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad->key.data) allocator_free(env_get_allocator(env), scratchpad->key.data); scratchpad->key=pivotkey; scratchpad->rid=pivotrid; ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); return (SPLIT); fail_dramatically: ham_assert(!(pivotkey.flags & HAM_KEY_USER_ALLOC), (0)); if (pivotkey.data) allocator_free(env_get_allocator(env), pivotkey.data); return st; }
static ham_status_t __insert_cursor(ham_btree_t *be, ham_key_t *key, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st; ham_page_t *root; ham_db_t *db=be_get_db(be); ham_env_t *env = db_get_env(db); insert_scratchpad_t scratchpad; ham_assert(hints->force_append == HAM_FALSE, (0)); ham_assert(hints->force_prepend == HAM_FALSE, (0)); /* * initialize the scratchpad */ memset(&scratchpad, 0, sizeof(scratchpad)); scratchpad.be=be; scratchpad.record=record; scratchpad.cursor=cursor; /* * get the root-page... */ ham_assert(btree_get_rootpage(be)!=0, ("btree has no root page")); st=db_fetch_page(&root, db, btree_get_rootpage(be), 0); ham_assert(st ? root == NULL : 1, (0)); if (st) return st; /* * ... and start the recursion */ st=__insert_recursive(root, key, 0, &scratchpad, hints); /* * if the root page was split, we have to create a new * root page. */ if (st==SPLIT) { ham_page_t *newroot; btree_node_t *node; /* * the root-page will be changed... */ st=ham_log_add_page_before(root); if (st) return (st); /* * allocate a new root page */ st=db_alloc_page(&newroot, db, PAGE_TYPE_B_ROOT, 0); ham_assert(st ? newroot == NULL : 1, (0)); if (st) return (st); ham_assert(page_get_owner(newroot), ("")); /* clear the node header */ memset(page_get_payload(newroot), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, root, HAM_TRUE); /* * insert the pivot element and the ptr_left */ node=ham_page_get_btree_node(newroot); btree_node_set_ptr_left(node, btree_get_rootpage(be)); st=__insert_nosplit(newroot, &scratchpad.key, scratchpad.rid, scratchpad.record, scratchpad.cursor, hints); ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); scratchpad.cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ if (st) { ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); } /* * set the new root page * * !! * do NOT delete the old root page - it's still in use! * * also don't forget to flush the backend - otherwise the header * page of the database will not contain the updated information. * The backend is flushed when the database is closed, but if * recovery is enabled then the flush here is critical. */ btree_set_rootpage(be, page_get_self(newroot)); be_set_dirty(be, HAM_TRUE); be->_fun_flush(be); /* * As we re-purpose a page, we will reset its pagecounter * as well to signal its first use as the new type assigned * here. */ if (env_get_cache(env) && (page_get_type(root)!=PAGE_TYPE_B_INDEX)) cache_update_page_access_counter(root, env_get_cache(env), 0); page_set_type(root, PAGE_TYPE_B_INDEX); page_set_dirty(root, env); page_set_dirty(newroot, env); /* the root page was modified (btree_set_rootpage) - make sure that * it's logged */ if (env_get_rt_flags(env)&HAM_ENABLE_RECOVERY) { st=txn_add_page(env_get_txn(env), env_get_header_page(env), HAM_TRUE); if (st) return (st); } } /* * release the scratchpad-memory and return to caller */ ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); }
static ham_status_t __append_key(ham_btree_t *be, ham_key_t *key, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st=0; ham_page_t *page; btree_node_t *node; ham_db_t *db; #ifdef HAM_DEBUG if (cursor && !bt_cursor_is_nil(cursor)) { ham_assert(be_get_db(be) == bt_cursor_get_db(cursor), (0)); } #endif db = be_get_db(be); /* * see if we get this btree leaf; if not, revert to regular scan * * As this is a speed-improvement hint re-using recent material, the page * should still sit in the cache, or we're using old info, which should be * discarded. */ st = db_fetch_page(&page, db, hints->leaf_page_addr, DB_ONLY_FROM_CACHE); if (st) return st; if (!page) { hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } page_add_ref(page); node=ham_page_get_btree_node(page); ham_assert(btree_node_is_leaf(node), ("iterator points to internal node")); /* * if the page is already full OR this page is not the right-most page * when we APPEND or the left-most node when we PREPEND * OR the new key is not the highest key: perform a normal insert */ if ((hints->force_append && btree_node_get_right(node)) || (hints->force_prepend && btree_node_get_left(node)) || btree_node_get_count(node) >= btree_get_maxkeys(be)) { page_release_ref(page); hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } /* * if the page is not empty: check if we append the key at the end / start * (depending on force_append/force_prepend), * or if it's actually inserted in the middle (when neither force_append * or force_prepend is specified: that'd be SEQUENTIAL insertion * hinting somewhere in the middle of the total key range. */ if (btree_node_get_count(node)!=0) { int cmp_hi; int cmp_lo; hints->cost++; if (!hints->force_prepend) { cmp_hi = key_compare_pub_to_int(db, page, key, btree_node_get_count(node)-1); /* key is in the middle */ if (cmp_hi < -1) { page_release_ref(page); return (ham_status_t)cmp_hi; } /* key is at the end */ if (cmp_hi > 0) { if (btree_node_get_right(node)) { /* not at top end of the btree, so we can't do the * fast track */ page_release_ref(page); //hints->flags &= ~HAM_HINT_APPEND; hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } hints->force_append = HAM_TRUE; hints->force_prepend = HAM_FALSE; } } else { /* hints->force_prepend is true */ /* not bigger than the right-most node while we * were trying to APPEND */ cmp_hi = -1; } if (!hints->force_append) { cmp_lo = key_compare_pub_to_int(db, page, key, 0); /* in the middle range */ if (cmp_lo < -1) { page_release_ref(page); return ((ham_status_t)cmp_lo); } /* key is at the start of page */ if (cmp_lo < 0) { if (btree_node_get_left(node)) { /* not at bottom end of the btree, so we can't * do the fast track */ page_release_ref(page); //hints->flags &= ~HAM_HINT_PREPEND; hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } hints->force_append = HAM_FALSE; hints->force_prepend = HAM_TRUE; } } else { /* hints->force_prepend is true */ /* not smaller than the left-most node while we were * trying to PREPEND */ cmp_lo = +1; } /* handle inserts in the middle range */ if (cmp_lo >= 0 && cmp_hi <= 0) { /* * Depending on where we are in the btree, the current key either * is going to end up in the middle of the given node/page, * OR the given key is out of range of the given leaf node. */ if (hints->force_append || hints->force_prepend) { /* * when prepend or append is FORCED, we are expected to * add keys ONLY at the beginning or end of the btree * key range. Clearly the current key does not fit that * criterium. */ page_release_ref(page); //hints->flags &= ~HAM_HINT_PREPEND; hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } /* * we discovered that the key must be inserted in the middle * of the current leaf. * * It does not matter whether the current leaf is at the start or * end of the btree range; as we need to add the key in the middle * of the current leaf, that info alone is enough to continue with * the fast track insert operation. */ ham_assert(!hints->force_prepend && !hints->force_append, (0)); } ham_assert((hints->force_prepend + hints->force_append) < 2, ("Either APPEND or PREPEND flag MAY be set, but not both")); } else { /* empty page: force insertion in slot 0 */ hints->force_append = HAM_FALSE; hints->force_prepend = HAM_TRUE; } /* * the page will be changed - write it to the log (if a log exists) */ st=ham_log_add_page_before(page); if (st) { page_release_ref(page); return (st); } /* * OK - we're really appending/prepending the new key. */ ham_assert(hints->force_append || hints->force_prepend, (0)); st=__insert_nosplit(page, key, 0, record, cursor, hints); page_release_ref(page); return (st); }