static ham_status_t __insert_nosplit(ham_page_t *page, ham_key_t *key, ham_offset_t rid, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st; ham_u16_t count; ham_size_t keysize; ham_size_t new_dupe_id = 0; int_key_t *bte = 0; btree_node_t *node; ham_db_t *db=page_get_owner(page); ham_bool_t exists = HAM_FALSE; ham_s32_t slot; ham_assert(page_get_owner(page), (0)); ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0)); node=ham_page_get_btree_node(page); count=btree_node_get_count(node); keysize=db_get_keysize(db); if (btree_node_get_count(node)==0) { slot = 0; } else if (hints->force_append) { slot = count; } else if (hints->force_prepend) { /* insert at beginning; shift all up by one */ slot = 0; } else { int cmp; hints->cost++; st=btree_get_slot(db, page, key, &slot, &cmp); if (st) return (st); /* insert the new key at the beginning? */ if (slot == -1) { slot = 0; } else { /* * key exists already */ if (cmp == 0) { if (hints->flags & HAM_OVERWRITE) { /* * no need to overwrite the key - it already exists! * however, we have to overwrite the data! */ if (!btree_node_is_leaf(node)) return (HAM_SUCCESS); } else if (!(hints->flags & HAM_DUPLICATE)) return (HAM_DUPLICATE_KEY); /* do NOT shift keys up to make room; just overwrite the current [slot] */ exists = HAM_TRUE; } else { /* * otherwise, if the new key is > then the slot key, move to * the next slot */ if (cmp > 0) { slot++; } } } } /* * in any case, uncouple the cursors and see if we must shift any elements to the * right */ bte=btree_node_get_key(db, node, slot); ham_assert(bte, (0)); if (!exists) { if (count > slot) { /* uncouple all cursors & shift any elements following [slot] */ st=bt_uncouple_all_cursors(page, slot); if (st) return (st); hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-slot)); memmove(((char *)bte)+db_get_int_key_header_size()+keysize, bte, (db_get_int_key_header_size()+keysize)*(count-slot)); } /* * if a new key is created or inserted: initialize it with zeroes */ memset(bte, 0, db_get_int_key_header_size()+keysize); } /* * if we're in the leaf: insert, overwrite or append the blob * (depends on the flags) */ if (btree_node_is_leaf(node)) { ham_status_t st; hints->cost++; st=key_set_record(db, bte, record, cursor ? bt_cursor_get_dupe_id(cursor) : 0, hints->flags, &new_dupe_id); if (st) return (st); hints->processed_leaf_page = page; hints->processed_slot = slot; } else { key_set_ptr(bte, rid); } page_set_dirty(page, db_get_env(db)); key_set_size(bte, key->size); /* * set a flag if the key is extended, and does not fit into the * btree */ if (key->size > db_get_keysize(db)) key_set_flags(bte, key_get_flags(bte)|KEY_IS_EXTENDED); /* * if we have a cursor: couple it to the new key * * the cursor always points to NIL. */ if (cursor) { if ((st=bt_cursor_set_to_nil(cursor))) return (st); ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_UNCOUPLED), ("coupling an uncoupled cursor, but need a nil-cursor")); ham_assert(!(bt_cursor_get_flags(cursor)&BT_CURSOR_FLAG_COUPLED), ("coupling a coupled cursor, but need a nil-cursor")); bt_cursor_set_flags(cursor, bt_cursor_get_flags(cursor)|BT_CURSOR_FLAG_COUPLED); bt_cursor_set_coupled_page(cursor, page); bt_cursor_set_coupled_index(cursor, slot); bt_cursor_set_dupe_id(cursor, new_dupe_id); memset(bt_cursor_get_dupe_cache(cursor), 0, sizeof(dupe_entry_t)); page_add_cursor(page, (ham_cursor_t *)cursor); } /* * if we've overwritten a key: no need to continue, we're done */ if (exists) return (0); /* * we insert the extended key, if necessary */ key_set_key(bte, key->data, db_get_keysize(db) < key->size ? db_get_keysize(db) : key->size); /* * if we need an extended key, allocate a blob and store * the blob-id in the key */ if (key->size > db_get_keysize(db)) { ham_offset_t blobid; key_set_key(bte, key->data, db_get_keysize(db)); st=key_insert_extended(&blobid, db, page, key); ham_assert(st ? blobid == 0 : 1, (0)); if (!blobid) return st ? st : HAM_INTERNAL_ERROR; key_set_extended_rid(db, bte, blobid); } /* * update the btree node-header */ btree_node_set_count(node, count+1); return (0); }
static ham_status_t __insert_split(ham_page_t *page, ham_key_t *key, ham_offset_t rid, insert_scratchpad_t *scratchpad, insert_hints_t *hints) { int cmp; ham_status_t st; ham_page_t *newpage, *oldsib; int_key_t *nbte, *obte; btree_node_t *nbtp, *obtp, *sbtp; ham_size_t count, keysize; ham_db_t *db=page_get_owner(page); ham_env_t *env = db_get_env(db); ham_key_t pivotkey, oldkey; ham_offset_t pivotrid; ham_u16_t pivot; ham_bool_t pivot_at_end=HAM_FALSE; ham_assert(page_get_owner(page), (0)); ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0)); ham_assert(hints->force_append == HAM_FALSE, (0)); keysize=db_get_keysize(db); /* * allocate a new page */ hints->cost++; st=db_alloc_page(&newpage, db, PAGE_TYPE_B_INDEX, 0); ham_assert(st ? page == NULL : 1, (0)); ham_assert(!st ? page != NULL : 1, (0)); if (st) return st; ham_assert(page_get_owner(newpage), ("")); /* clear the node header */ memset(page_get_payload(newpage), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, page, HAM_TRUE); /* * move half of the key/rid-tuples to the new page * * !! recno: keys are sorted; we do a "lazy split" */ nbtp=ham_page_get_btree_node(newpage); nbte=btree_node_get_key(db, nbtp, 0); obtp=ham_page_get_btree_node(page); obte=btree_node_get_key(db, obtp, 0); count=btree_node_get_count(obtp); /* * for databases with sequential access (this includes recno databases): * do not split in the middle, but at the very end of the page * * if this page is the right-most page in the index, and this key is * inserted at the very end, then we select the same pivot as for * sequential access */ if (db_get_data_access_mode(db)&HAM_DAM_SEQUENTIAL_INSERT) pivot_at_end=HAM_TRUE; else if (btree_node_get_right(obtp)==0) { cmp=key_compare_pub_to_int(db, page, key, btree_node_get_count(obtp)-1); if (cmp>0) pivot_at_end=HAM_TRUE; } /* * internal pages set the count of the new page to count-pivot-1 (because * the pivot element will become ptr_left of the new page). * by using pivot=count-2 we make sure that at least 1 element will remain * in the new node. */ if (pivot_at_end) { pivot=count-2; } else { pivot=count/2; } /* * uncouple all cursors */ st=bt_uncouple_all_cursors(page, pivot); if (st) return (st); /* * if we split a leaf, we'll insert the pivot element in the leaf * page, too. in internal nodes, we don't insert it, but propagate * it to the parent node only. */ if (btree_node_is_leaf(obtp)) { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*pivot, (db_get_int_key_header_size()+keysize)*(count-pivot)); } else { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot-1)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*(pivot+1), (db_get_int_key_header_size()+keysize)*(count-pivot-1)); } /* * store the pivot element, we'll need it later to propagate it * to the parent page */ nbte=btree_node_get_key(db, obtp, pivot); memset(&pivotkey, 0, sizeof(pivotkey)); memset(&oldkey, 0, sizeof(oldkey)); oldkey.data=key_get_key(nbte); oldkey.size=key_get_size(nbte); oldkey._flags=key_get_flags(nbte); st = util_copy_key(db, &oldkey, &pivotkey); if (st) { (void)db_free_page(newpage, DB_MOVE_TO_FREELIST); goto fail_dramatically; } pivotrid=page_get_self(newpage); /* * adjust the page count */ if (btree_node_is_leaf(obtp)) { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot); } else { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot-1); } /* * if we're in an internal page: fix the ptr_left of the new page * (it points to the ptr of the pivot key) */ if (!btree_node_is_leaf(obtp)) { /* * nbte still contains the pivot key */ btree_node_set_ptr_left(nbtp, key_get_ptr(nbte)); } /* * insert the new element */ hints->cost++; cmp=key_compare_pub_to_int(db, page, key, pivot); if (cmp < -1) { st = (ham_status_t)cmp; goto fail_dramatically; } if (cmp>=0) st=__insert_nosplit(newpage, key, rid, scratchpad->record, scratchpad->cursor, hints); else st=__insert_nosplit(page, key, rid, scratchpad->record, scratchpad->cursor, hints); if (st) { goto fail_dramatically; } scratchpad->cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ /* * fix the double-linked list of pages, and mark the pages as dirty */ if (btree_node_get_right(obtp)) { st=db_fetch_page(&oldsib, db, btree_node_get_right(obtp), 0); if (st) goto fail_dramatically; } else { oldsib=0; } if (oldsib) { st=ham_log_add_page_before(oldsib); if (st) goto fail_dramatically; } btree_node_set_left (nbtp, page_get_self(page)); btree_node_set_right(nbtp, btree_node_get_right(obtp)); btree_node_set_right(obtp, page_get_self(newpage)); if (oldsib) { sbtp=ham_page_get_btree_node(oldsib); btree_node_set_left(sbtp, page_get_self(newpage)); page_set_dirty(oldsib, env); } page_set_dirty(newpage, env); page_set_dirty(page, env); /* * propagate the pivot key to the parent page */ ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad->key.data) allocator_free(env_get_allocator(env), scratchpad->key.data); scratchpad->key=pivotkey; scratchpad->rid=pivotrid; ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); return (SPLIT); fail_dramatically: ham_assert(!(pivotkey.flags & HAM_KEY_USER_ALLOC), (0)); if (pivotkey.data) allocator_free(env_get_allocator(env), pivotkey.data); return st; }
/** * uncouple all cursors from a page * * @remark this is called whenever the page is deleted or * becoming invalid @note This is a B+-tree 'backend' method. */ static ham_status_t my_fun_uncouple_all_cursors(ham_btree_t *be, ham_page_t *page, ham_size_t start) { return (bt_uncouple_all_cursors(page, start)); }