static ham_status_t __append_key(ham_btree_t *be, ham_key_t *key, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st=0; ham_page_t *page; btree_node_t *node; ham_db_t *db; #ifdef HAM_DEBUG if (cursor && !bt_cursor_is_nil(cursor)) { ham_assert(be_get_db(be) == bt_cursor_get_db(cursor), (0)); } #endif db = be_get_db(be); /* * see if we get this btree leaf; if not, revert to regular scan * * As this is a speed-improvement hint re-using recent material, the page * should still sit in the cache, or we're using old info, which should be * discarded. */ st = db_fetch_page(&page, db, hints->leaf_page_addr, DB_ONLY_FROM_CACHE); if (st) return st; if (!page) { hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } page_add_ref(page); node=ham_page_get_btree_node(page); ham_assert(btree_node_is_leaf(node), ("iterator points to internal node")); /* * if the page is already full OR this page is not the right-most page * when we APPEND or the left-most node when we PREPEND * OR the new key is not the highest key: perform a normal insert */ if ((hints->force_append && btree_node_get_right(node)) || (hints->force_prepend && btree_node_get_left(node)) || btree_node_get_count(node) >= btree_get_maxkeys(be)) { page_release_ref(page); hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } /* * if the page is not empty: check if we append the key at the end / start * (depending on force_append/force_prepend), * or if it's actually inserted in the middle (when neither force_append * or force_prepend is specified: that'd be SEQUENTIAL insertion * hinting somewhere in the middle of the total key range. */ if (btree_node_get_count(node)!=0) { int cmp_hi; int cmp_lo; hints->cost++; if (!hints->force_prepend) { cmp_hi = key_compare_pub_to_int(db, page, key, btree_node_get_count(node)-1); /* key is in the middle */ if (cmp_hi < -1) { page_release_ref(page); return (ham_status_t)cmp_hi; } /* key is at the end */ if (cmp_hi > 0) { if (btree_node_get_right(node)) { /* not at top end of the btree, so we can't do the * fast track */ page_release_ref(page); //hints->flags &= ~HAM_HINT_APPEND; hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } hints->force_append = HAM_TRUE; hints->force_prepend = HAM_FALSE; } } else { /* hints->force_prepend is true */ /* not bigger than the right-most node while we * were trying to APPEND */ cmp_hi = -1; } if (!hints->force_append) { cmp_lo = key_compare_pub_to_int(db, page, key, 0); /* in the middle range */ if (cmp_lo < -1) { page_release_ref(page); return ((ham_status_t)cmp_lo); } /* key is at the start of page */ if (cmp_lo < 0) { if (btree_node_get_left(node)) { /* not at bottom end of the btree, so we can't * do the fast track */ page_release_ref(page); //hints->flags &= ~HAM_HINT_PREPEND; hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } hints->force_append = HAM_FALSE; hints->force_prepend = HAM_TRUE; } } else { /* hints->force_prepend is true */ /* not smaller than the left-most node while we were * trying to PREPEND */ cmp_lo = +1; } /* handle inserts in the middle range */ if (cmp_lo >= 0 && cmp_hi <= 0) { /* * Depending on where we are in the btree, the current key either * is going to end up in the middle of the given node/page, * OR the given key is out of range of the given leaf node. */ if (hints->force_append || hints->force_prepend) { /* * when prepend or append is FORCED, we are expected to * add keys ONLY at the beginning or end of the btree * key range. Clearly the current key does not fit that * criterium. */ page_release_ref(page); //hints->flags &= ~HAM_HINT_PREPEND; hints->force_append = HAM_FALSE; hints->force_prepend = HAM_FALSE; return (__insert_cursor(be, key, record, cursor, hints)); } /* * we discovered that the key must be inserted in the middle * of the current leaf. * * It does not matter whether the current leaf is at the start or * end of the btree range; as we need to add the key in the middle * of the current leaf, that info alone is enough to continue with * the fast track insert operation. */ ham_assert(!hints->force_prepend && !hints->force_append, (0)); } ham_assert((hints->force_prepend + hints->force_append) < 2, ("Either APPEND or PREPEND flag MAY be set, but not both")); } else { /* empty page: force insertion in slot 0 */ hints->force_append = HAM_FALSE; hints->force_prepend = HAM_TRUE; } /* * the page will be changed - write it to the log (if a log exists) */ st=ham_log_add_page_before(page); if (st) { page_release_ref(page); return (st); } /* * OK - we're really appending/prepending the new key. */ ham_assert(hints->force_append || hints->force_prepend, (0)); st=__insert_nosplit(page, key, 0, record, cursor, hints); page_release_ref(page); return (st); }
static ham_status_t __insert_split(ham_page_t *page, ham_key_t *key, ham_offset_t rid, insert_scratchpad_t *scratchpad, insert_hints_t *hints) { int cmp; ham_status_t st; ham_page_t *newpage, *oldsib; int_key_t *nbte, *obte; btree_node_t *nbtp, *obtp, *sbtp; ham_size_t count, keysize; ham_db_t *db=page_get_owner(page); ham_env_t *env = db_get_env(db); ham_key_t pivotkey, oldkey; ham_offset_t pivotrid; ham_u16_t pivot; ham_bool_t pivot_at_end=HAM_FALSE; ham_assert(page_get_owner(page), (0)); ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0)); ham_assert(hints->force_append == HAM_FALSE, (0)); keysize=db_get_keysize(db); /* * allocate a new page */ hints->cost++; st=db_alloc_page(&newpage, db, PAGE_TYPE_B_INDEX, 0); ham_assert(st ? page == NULL : 1, (0)); ham_assert(!st ? page != NULL : 1, (0)); if (st) return st; ham_assert(page_get_owner(newpage), ("")); /* clear the node header */ memset(page_get_payload(newpage), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, page, HAM_TRUE); /* * move half of the key/rid-tuples to the new page * * !! recno: keys are sorted; we do a "lazy split" */ nbtp=ham_page_get_btree_node(newpage); nbte=btree_node_get_key(db, nbtp, 0); obtp=ham_page_get_btree_node(page); obte=btree_node_get_key(db, obtp, 0); count=btree_node_get_count(obtp); /* * for databases with sequential access (this includes recno databases): * do not split in the middle, but at the very end of the page * * if this page is the right-most page in the index, and this key is * inserted at the very end, then we select the same pivot as for * sequential access */ if (db_get_data_access_mode(db)&HAM_DAM_SEQUENTIAL_INSERT) pivot_at_end=HAM_TRUE; else if (btree_node_get_right(obtp)==0) { cmp=key_compare_pub_to_int(db, page, key, btree_node_get_count(obtp)-1); if (cmp>0) pivot_at_end=HAM_TRUE; } /* * internal pages set the count of the new page to count-pivot-1 (because * the pivot element will become ptr_left of the new page). * by using pivot=count-2 we make sure that at least 1 element will remain * in the new node. */ if (pivot_at_end) { pivot=count-2; } else { pivot=count/2; } /* * uncouple all cursors */ st=bt_uncouple_all_cursors(page, pivot); if (st) return (st); /* * if we split a leaf, we'll insert the pivot element in the leaf * page, too. in internal nodes, we don't insert it, but propagate * it to the parent node only. */ if (btree_node_is_leaf(obtp)) { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*pivot, (db_get_int_key_header_size()+keysize)*(count-pivot)); } else { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot-1)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*(pivot+1), (db_get_int_key_header_size()+keysize)*(count-pivot-1)); } /* * store the pivot element, we'll need it later to propagate it * to the parent page */ nbte=btree_node_get_key(db, obtp, pivot); memset(&pivotkey, 0, sizeof(pivotkey)); memset(&oldkey, 0, sizeof(oldkey)); oldkey.data=key_get_key(nbte); oldkey.size=key_get_size(nbte); oldkey._flags=key_get_flags(nbte); st = util_copy_key(db, &oldkey, &pivotkey); if (st) { (void)db_free_page(newpage, DB_MOVE_TO_FREELIST); goto fail_dramatically; } pivotrid=page_get_self(newpage); /* * adjust the page count */ if (btree_node_is_leaf(obtp)) { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot); } else { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot-1); } /* * if we're in an internal page: fix the ptr_left of the new page * (it points to the ptr of the pivot key) */ if (!btree_node_is_leaf(obtp)) { /* * nbte still contains the pivot key */ btree_node_set_ptr_left(nbtp, key_get_ptr(nbte)); } /* * insert the new element */ hints->cost++; cmp=key_compare_pub_to_int(db, page, key, pivot); if (cmp < -1) { st = (ham_status_t)cmp; goto fail_dramatically; } if (cmp>=0) st=__insert_nosplit(newpage, key, rid, scratchpad->record, scratchpad->cursor, hints); else st=__insert_nosplit(page, key, rid, scratchpad->record, scratchpad->cursor, hints); if (st) { goto fail_dramatically; } scratchpad->cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ /* * fix the double-linked list of pages, and mark the pages as dirty */ if (btree_node_get_right(obtp)) { st=db_fetch_page(&oldsib, db, btree_node_get_right(obtp), 0); if (st) goto fail_dramatically; } else { oldsib=0; } if (oldsib) { st=ham_log_add_page_before(oldsib); if (st) goto fail_dramatically; } btree_node_set_left (nbtp, page_get_self(page)); btree_node_set_right(nbtp, btree_node_get_right(obtp)); btree_node_set_right(obtp, page_get_self(newpage)); if (oldsib) { sbtp=ham_page_get_btree_node(oldsib); btree_node_set_left(sbtp, page_get_self(newpage)); page_set_dirty(oldsib, env); } page_set_dirty(newpage, env); page_set_dirty(page, env); /* * propagate the pivot key to the parent page */ ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad->key.data) allocator_free(env_get_allocator(env), scratchpad->key.data); scratchpad->key=pivotkey; scratchpad->rid=pivotrid; ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); return (SPLIT); fail_dramatically: ham_assert(!(pivotkey.flags & HAM_KEY_USER_ALLOC), (0)); if (pivotkey.data) allocator_free(env_get_allocator(env), pivotkey.data); return st; }
/** * perform a binary search for the *smallest* element, which is >= the * key */ ham_status_t btree_get_slot(ham_db_t *db, ham_page_t *page, ham_key_t *key, ham_s32_t *slot, int *pcmp) { int cmp = -1; btree_node_t *node = ham_page_get_btree_node(page); ham_s32_t r = btree_node_get_count(node)-1; ham_s32_t l = 1; ham_s32_t i; ham_s32_t last = MAX_KEYS_PER_NODE + 1; ham_assert(btree_node_get_count(node)>0, ("node is empty")); /* * only one element in this node? */ if (r==0) { cmp=key_compare_pub_to_int(db, page, key, 0); if (cmp < -1) return (ham_status_t)cmp; *slot=cmp<0 ? -1 : 0; goto bail; } for (;;) { /* [i_a] compare is not needed (while (r>=0)) */ /* get the median item; if it's identical with the "last" item, * we've found the slot */ i=(l+r)/2; if (i==last) { *slot=i; cmp=1; ham_assert(i >= 0, (0)); ham_assert(i < MAX_KEYS_PER_NODE + 1, (0)); break; } /* compare it against the key */ cmp=key_compare_pub_to_int(db, page, key, (ham_u16_t)i); if (cmp < -1) return (ham_status_t)cmp; /* found it? */ if (cmp==0) { *slot=i; break; } /* if the key is bigger than the item: search "to the left" */ if (cmp<0) { if (r==0) { ham_assert(i == 0, (0)); *slot=-1; break; } r=i-1; } else { last=i; l=i+1; } } bail: if (pcmp /* && *slot!=-1 */) { /* [i_a] reduced the total number of key comparisons; this one is not needed any more, as it was only really required to compensate for the (i==last) conditional jump above. So we can simply use 'cmp' as-is. */ *pcmp=cmp; } return (0); }