static ham_status_t __insert_split(ham_page_t *page, ham_key_t *key, ham_offset_t rid, insert_scratchpad_t *scratchpad, insert_hints_t *hints) { int cmp; ham_status_t st; ham_page_t *newpage, *oldsib; int_key_t *nbte, *obte; btree_node_t *nbtp, *obtp, *sbtp; ham_size_t count, keysize; ham_db_t *db=page_get_owner(page); ham_env_t *env = db_get_env(db); ham_key_t pivotkey, oldkey; ham_offset_t pivotrid; ham_u16_t pivot; ham_bool_t pivot_at_end=HAM_FALSE; ham_assert(page_get_owner(page), (0)); ham_assert(device_get_env(page_get_device(page)) == db_get_env(page_get_owner(page)), (0)); ham_assert(hints->force_append == HAM_FALSE, (0)); keysize=db_get_keysize(db); /* * allocate a new page */ hints->cost++; st=db_alloc_page(&newpage, db, PAGE_TYPE_B_INDEX, 0); ham_assert(st ? page == NULL : 1, (0)); ham_assert(!st ? page != NULL : 1, (0)); if (st) return st; ham_assert(page_get_owner(newpage), ("")); /* clear the node header */ memset(page_get_payload(newpage), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, page, HAM_TRUE); /* * move half of the key/rid-tuples to the new page * * !! recno: keys are sorted; we do a "lazy split" */ nbtp=ham_page_get_btree_node(newpage); nbte=btree_node_get_key(db, nbtp, 0); obtp=ham_page_get_btree_node(page); obte=btree_node_get_key(db, obtp, 0); count=btree_node_get_count(obtp); /* * for databases with sequential access (this includes recno databases): * do not split in the middle, but at the very end of the page * * if this page is the right-most page in the index, and this key is * inserted at the very end, then we select the same pivot as for * sequential access */ if (db_get_data_access_mode(db)&HAM_DAM_SEQUENTIAL_INSERT) pivot_at_end=HAM_TRUE; else if (btree_node_get_right(obtp)==0) { cmp=key_compare_pub_to_int(db, page, key, btree_node_get_count(obtp)-1); if (cmp>0) pivot_at_end=HAM_TRUE; } /* * internal pages set the count of the new page to count-pivot-1 (because * the pivot element will become ptr_left of the new page). * by using pivot=count-2 we make sure that at least 1 element will remain * in the new node. */ if (pivot_at_end) { pivot=count-2; } else { pivot=count/2; } /* * uncouple all cursors */ st=bt_uncouple_all_cursors(page, pivot); if (st) return (st); /* * if we split a leaf, we'll insert the pivot element in the leaf * page, too. in internal nodes, we don't insert it, but propagate * it to the parent node only. */ if (btree_node_is_leaf(obtp)) { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*pivot, (db_get_int_key_header_size()+keysize)*(count-pivot)); } else { hints->cost += stats_memmove_cost((db_get_int_key_header_size()+keysize)*(count-pivot-1)); memcpy((char *)nbte, ((char *)obte)+(db_get_int_key_header_size()+keysize)*(pivot+1), (db_get_int_key_header_size()+keysize)*(count-pivot-1)); } /* * store the pivot element, we'll need it later to propagate it * to the parent page */ nbte=btree_node_get_key(db, obtp, pivot); memset(&pivotkey, 0, sizeof(pivotkey)); memset(&oldkey, 0, sizeof(oldkey)); oldkey.data=key_get_key(nbte); oldkey.size=key_get_size(nbte); oldkey._flags=key_get_flags(nbte); st = util_copy_key(db, &oldkey, &pivotkey); if (st) { (void)db_free_page(newpage, DB_MOVE_TO_FREELIST); goto fail_dramatically; } pivotrid=page_get_self(newpage); /* * adjust the page count */ if (btree_node_is_leaf(obtp)) { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot); } else { btree_node_set_count(obtp, pivot); btree_node_set_count(nbtp, count-pivot-1); } /* * if we're in an internal page: fix the ptr_left of the new page * (it points to the ptr of the pivot key) */ if (!btree_node_is_leaf(obtp)) { /* * nbte still contains the pivot key */ btree_node_set_ptr_left(nbtp, key_get_ptr(nbte)); } /* * insert the new element */ hints->cost++; cmp=key_compare_pub_to_int(db, page, key, pivot); if (cmp < -1) { st = (ham_status_t)cmp; goto fail_dramatically; } if (cmp>=0) st=__insert_nosplit(newpage, key, rid, scratchpad->record, scratchpad->cursor, hints); else st=__insert_nosplit(page, key, rid, scratchpad->record, scratchpad->cursor, hints); if (st) { goto fail_dramatically; } scratchpad->cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ /* * fix the double-linked list of pages, and mark the pages as dirty */ if (btree_node_get_right(obtp)) { st=db_fetch_page(&oldsib, db, btree_node_get_right(obtp), 0); if (st) goto fail_dramatically; } else { oldsib=0; } if (oldsib) { st=ham_log_add_page_before(oldsib); if (st) goto fail_dramatically; } btree_node_set_left (nbtp, page_get_self(page)); btree_node_set_right(nbtp, btree_node_get_right(obtp)); btree_node_set_right(obtp, page_get_self(newpage)); if (oldsib) { sbtp=ham_page_get_btree_node(oldsib); btree_node_set_left(sbtp, page_get_self(newpage)); page_set_dirty(oldsib, env); } page_set_dirty(newpage, env); page_set_dirty(page, env); /* * propagate the pivot key to the parent page */ ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad->key.data) allocator_free(env_get_allocator(env), scratchpad->key.data); scratchpad->key=pivotkey; scratchpad->rid=pivotrid; ham_assert(!(scratchpad->key.flags & HAM_KEY_USER_ALLOC), (0)); return (SPLIT); fail_dramatically: ham_assert(!(pivotkey.flags & HAM_KEY_USER_ALLOC), (0)); if (pivotkey.data) allocator_free(env_get_allocator(env), pivotkey.data); return st; }
ham_status_t txn_abort(ham_txn_t *txn, ham_u32_t flags) { ham_status_t st; ham_env_t *env=txn_get_env(txn); /* * are cursors attached to this txn? if yes, fail */ if (txn_get_cursor_refcount(txn)) { ham_trace(("transaction cannot be aborted till all attached " "cursors are closed")); return HAM_CURSOR_STILL_OPEN; } if (env_get_log(env) && !(txn_get_flags(txn)&HAM_TXN_READ_ONLY)) { st=ham_log_append_txn_abort(env_get_log(env), txn); if (st) return st; } env_set_txn(env, 0); /* * undo all operations from this transaction * * this includes allocated pages (they're moved to the freelist), * deleted pages (they're un-deleted) and other modifications (will * re-create the original page from the logfile) * * keep txn_get_pagelist(txn) intact during every round, so no * local var for this one. */ while (txn_get_pagelist(txn)) { ham_page_t *head = txn_get_pagelist(txn); if (!(flags & DO_NOT_NUKE_PAGE_STATS)) { /* * nuke critical statistics, such as tracked outer bounds; imagine, * for example, a failing erase transaction which, through erasing * the top-most key, lowers the actual upper bound, after which * the transaction fails at some later point in life. Now if we * wouldn't 'rewind' our bounds-statistics, we would have a * situation where a subsequent out-of-bounds insert (~ append) * would possibly FAIL due to the hinter using incorrect bounds * information then! * * Hence we 'reverse' our statistics here and the easiest route * is to just nuke the critical bits; subsequent find/insert/erase * operations will ensure that the stats will get updated again, * anyhow. All we loose then is a few subsequent operations, which * might have been hinted if we had played a smarter game of * statistics 'reversal'. Soit. */ ham_db_t *db = page_get_owner(head); /* * only need to do this for index pages anyhow, and those are the * ones which have their 'ownership' set. */ if (db) { stats_page_is_nuked(db, head, HAM_FALSE); } } ham_assert(page_is_in_list(txn_get_pagelist(txn), head, PAGE_LIST_TXN), (0)); txn_get_pagelist(txn) = page_list_remove(head, PAGE_LIST_TXN, head); /* if this page was allocated by this transaction, then we can * move the whole page to the freelist */ if (page_get_alloc_txn_id(head)==txn_get_id(txn)) { (void)freel_mark_free(env, 0, page_get_self(head), env_get_pagesize(env), HAM_TRUE); } else { /* remove the 'delete pending' flag */ page_set_npers_flags(head, page_get_npers_flags(head)&~PAGE_NPERS_DELETE_PENDING); /* if the page is dirty, and RECOVERY is enabled: recreate * the original, unmodified page from the log */ if (env_get_log(env) && page_is_dirty(head)) { st=ham_log_recreate(env_get_log(env), head); if (st) return (st); /*page_set_undirty(head); */ } } /* page is no longer in use */ page_release_ref(head); } ham_assert(txn_get_pagelist(txn)==0, (0)); return (0); }
static ham_status_t __insert_cursor(ham_btree_t *be, ham_key_t *key, ham_record_t *record, ham_bt_cursor_t *cursor, insert_hints_t *hints) { ham_status_t st; ham_page_t *root; ham_db_t *db=be_get_db(be); ham_env_t *env = db_get_env(db); insert_scratchpad_t scratchpad; ham_assert(hints->force_append == HAM_FALSE, (0)); ham_assert(hints->force_prepend == HAM_FALSE, (0)); /* * initialize the scratchpad */ memset(&scratchpad, 0, sizeof(scratchpad)); scratchpad.be=be; scratchpad.record=record; scratchpad.cursor=cursor; /* * get the root-page... */ ham_assert(btree_get_rootpage(be)!=0, ("btree has no root page")); st=db_fetch_page(&root, db, btree_get_rootpage(be), 0); ham_assert(st ? root == NULL : 1, (0)); if (st) return st; /* * ... and start the recursion */ st=__insert_recursive(root, key, 0, &scratchpad, hints); /* * if the root page was split, we have to create a new * root page. */ if (st==SPLIT) { ham_page_t *newroot; btree_node_t *node; /* * the root-page will be changed... */ st=ham_log_add_page_before(root); if (st) return (st); /* * allocate a new root page */ st=db_alloc_page(&newroot, db, PAGE_TYPE_B_ROOT, 0); ham_assert(st ? newroot == NULL : 1, (0)); if (st) return (st); ham_assert(page_get_owner(newroot), ("")); /* clear the node header */ memset(page_get_payload(newroot), 0, sizeof(btree_node_t)); stats_page_is_nuked(db, root, HAM_TRUE); /* * insert the pivot element and the ptr_left */ node=ham_page_get_btree_node(newroot); btree_node_set_ptr_left(node, btree_get_rootpage(be)); st=__insert_nosplit(newroot, &scratchpad.key, scratchpad.rid, scratchpad.record, scratchpad.cursor, hints); ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); scratchpad.cursor=0; /* don't overwrite cursor if __insert_nosplit is called again */ if (st) { ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); } /* * set the new root page * * !! * do NOT delete the old root page - it's still in use! * * also don't forget to flush the backend - otherwise the header * page of the database will not contain the updated information. * The backend is flushed when the database is closed, but if * recovery is enabled then the flush here is critical. */ btree_set_rootpage(be, page_get_self(newroot)); be_set_dirty(be, HAM_TRUE); be->_fun_flush(be); /* * As we re-purpose a page, we will reset its pagecounter * as well to signal its first use as the new type assigned * here. */ if (env_get_cache(env) && (page_get_type(root)!=PAGE_TYPE_B_INDEX)) cache_update_page_access_counter(root, env_get_cache(env), 0); page_set_type(root, PAGE_TYPE_B_INDEX); page_set_dirty(root, env); page_set_dirty(newroot, env); /* the root page was modified (btree_set_rootpage) - make sure that * it's logged */ if (env_get_rt_flags(env)&HAM_ENABLE_RECOVERY) { st=txn_add_page(env_get_txn(env), env_get_header_page(env), HAM_TRUE); if (st) return (st); } } /* * release the scratchpad-memory and return to caller */ ham_assert(!(scratchpad.key.flags & HAM_KEY_USER_ALLOC), (0)); if (scratchpad.key.data) allocator_free(env_get_allocator(env), scratchpad.key.data); return (st); }