/* * __wt_row_modify -- * Row-store insert, update and delete. */ int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) { WT_DECL_RET; WT_INSERT *ins; WT_INSERT_HEAD **inshead, *new_inshead, **new_inslist; WT_ITEM *key, *value; WT_PAGE *page; WT_UPDATE **new_upd, *upd, **upd_entry, *upd_obsolete; size_t ins_size, upd_size; size_t new_inshead_size, new_inslist_size, new_upd_size; uint32_t ins_slot; u_int skipdepth; int i, logged; key = &cbt->iface.key; value = is_remove ? NULL : &cbt->iface.value; page = cbt->page; ins = NULL; new_inshead = NULL; new_inslist = NULL; new_upd = NULL; upd = NULL; logged = 0; /* * Modify: allocate an update array as necessary, build a WT_UPDATE * structure, and call a serialized function to insert the WT_UPDATE * structure. * * Insert: allocate an insert array as necessary, build a WT_INSERT * and WT_UPDATE structure pair, and call a serialized function to * insert the WT_INSERT structure. */ if (cbt->compare == 0) { new_upd_size = 0; if (cbt->ins == NULL) { /* * Allocate an update array as necessary. * * Set the WT_UPDATE array reference. */ if (page->u.row.upd == NULL) { WT_ERR(__wt_calloc_def( session, page->entries, &new_upd)); new_upd_size = page->entries * sizeof(WT_UPDATE *); upd_entry = &new_upd[cbt->slot]; } else upd_entry = &page->u.row.upd[cbt->slot]; } else upd_entry = &cbt->ins->upd; /* Make sure the update can proceed. */ WT_ERR(__wt_update_check(session, page, *upd_entry)); /* Allocate the WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, &upd->txnid)); logged = 1; /* Serialize the update. */ WT_ERR(__wt_update_serial(session, page, cbt->write_gen, upd_entry, &new_upd, new_upd_size, &upd, upd_size, &upd_obsolete)); /* Discard any obsolete WT_UPDATE structures. */ if (upd_obsolete != NULL) __wt_update_obsolete_free(session, page, upd_obsolete); } else { /* Make sure the update can proceed. */ WT_ERR(__wt_update_check(session, page, NULL)); /* * Allocate insert array if necessary, and set the array * reference. * * We allocate an additional insert array slot for insert keys * sorting less than any key on the page. The test to select * that slot is baroque: if the search returned the first page * slot, we didn't end up processing an insert list, and the * comparison value indicates the search key was smaller than * the returned slot, then we're using the smallest-key insert * slot. That's hard, so we set a flag. */ ins_slot = F_ISSET( cbt, WT_CBT_SEARCH_SMALLEST) ? page->entries : cbt->slot; new_inshead_size = new_inslist_size = 0; if (page->u.row.ins == NULL) { WT_ERR(__wt_calloc_def( session, page->entries + 1, &new_inslist)); new_inslist_size = (page->entries + 1) * sizeof(WT_INSERT_HEAD *); inshead = &new_inslist[ins_slot]; } else inshead = &page->u.row.ins[ins_slot]; /* * Allocate a new insert list head as necessary. * * If allocating a new insert list head, we have to initialize * the cursor's insert list stack and insert head reference as * well, search couldn't have. */ if (*inshead == NULL) { new_inshead_size = sizeof(WT_INSERT_HEAD); WT_ERR(__wt_calloc_def(session, 1, &new_inshead)); for (i = 0; i < WT_SKIP_MAXDEPTH; i++) cbt->ins_stack[i] = &new_inshead->head[i]; cbt->ins_head = new_inshead; } /* Choose a skiplist depth for this insert. */ skipdepth = __wt_skip_choose_depth(); /* * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and * update the cursor to reference it. */ WT_ERR(__wt_row_insert_alloc( session, key, skipdepth, &ins, &ins_size)); WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, &upd->txnid)); logged = 1; ins->upd = upd; ins_size += upd_size; cbt->ins = ins; /* Insert the WT_INSERT structure. */ WT_ERR(__wt_insert_serial(session, page, cbt->write_gen, inshead, cbt->ins_stack, &new_inslist, new_inslist_size, &new_inshead, new_inshead_size, &ins, ins_size, skipdepth)); } if (0) { err: /* * Remove the update from the current transaction, so we don't * try to modify it on rollback. */ if (logged) __wt_txn_unmodify(session); __wt_free(session, ins); __wt_free(session, upd); } /* Free any insert, update arrays. */ __wt_free(session, new_inslist); __wt_free(session, new_inshead); __wt_free(session, new_upd); return (ret); }
/* * __wt_col_modify -- * Column-store delete, insert, and update. */ int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int op) { WT_BTREE *btree; WT_DECL_RET; WT_INSERT *ins, *ins_copy; WT_INSERT_HEAD **inshead, *new_inshead, **new_inslist; WT_ITEM *value, _value; WT_PAGE *page; WT_UPDATE *old_upd, *upd, *upd_obsolete; size_t ins_size, new_inshead_size, new_inslist_size, upd_size; uint64_t recno; u_int skipdepth; int i, logged; btree = cbt->btree; page = cbt->page; recno = cbt->iface.recno; logged = 0; WT_ASSERT(session, op != 1); switch (op) { case 2: /* Remove */ if (btree->type == BTREE_COL_FIX) { value = &_value; value->data = ""; value->size = 1; } else value = NULL; break; case 3: /* Insert/Update */ default: value = &cbt->iface.value; /* * There's some chance the application specified a record past * the last record on the page. If that's the case, and we're * inserting a new WT_INSERT/WT_UPDATE pair, it goes on the * append list, not the update list. */ if (recno == 0 || recno > __col_last_recno(page)) op = 1; break; } /* If we don't yet have a modify structure, we'll need one. */ WT_RET(__wt_page_modify_init(session, page)); ins = NULL; new_inshead = NULL; new_inslist = NULL; upd = NULL; /* * Delete, insert or update a column-store entry. * * If modifying a previously modified record, create a new WT_UPDATE * entry and have a serialized function link it into an existing * WT_INSERT entry's WT_UPDATE list. * * Else, allocate an insert array as necessary, build a WT_INSERT and * WT_UPDATE structure pair, and call a serialized function to insert * the WT_INSERT structure. */ if (cbt->compare == 0 && cbt->ins != NULL) { /* Make sure the update can proceed. */ WT_ERR( __wt_update_check(session, page, old_upd = cbt->ins->upd)); /* Allocate the WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, &upd->txnid)); logged = 1; /* Serialize the update. */ WT_ERR(__wt_update_serial(session, page, cbt->write_gen, &cbt->ins->upd, old_upd, NULL, 0, &upd, upd_size, &upd_obsolete)); /* Discard any obsolete WT_UPDATE structures. */ if (upd_obsolete != NULL) __wt_update_obsolete_free(session, page, upd_obsolete); } else { /* Make sure the update can proceed. */ WT_ERR(__wt_update_check(session, page, NULL)); /* There may be no insert list, allocate as necessary. */ new_inshead_size = new_inslist_size = 0; if (op == 1) { if (page->modify->append == NULL) { new_inslist_size = 1 * sizeof(WT_INSERT_HEAD *); WT_ERR( __wt_calloc_def(session, 1, &new_inslist)); inshead = &new_inslist[0]; } else inshead = &page->modify->append[0]; cbt->ins_head = *inshead; } else if (page->type == WT_PAGE_COL_FIX) { if (page->modify->update == NULL) { new_inslist_size = 1 * sizeof(WT_INSERT_HEAD *); WT_ERR( __wt_calloc_def(session, 1, &new_inslist)); inshead = &new_inslist[0]; } else inshead = &page->modify->update[0]; } else { if (page->modify->update == NULL) { new_inslist_size = page->entries * sizeof(WT_INSERT_HEAD *); WT_ERR(__wt_calloc_def( session, page->entries, &new_inslist)); inshead = &new_inslist[cbt->slot]; } else inshead = &page->modify->update[cbt->slot]; } /* There may be no WT_INSERT list, allocate as necessary. */ if (*inshead == NULL) { new_inshead_size = sizeof(WT_INSERT_HEAD); WT_ERR(__wt_calloc_def(session, 1, &new_inshead)); for (i = 0; i < WT_SKIP_MAXDEPTH; i++) { cbt->ins_stack[i] = &new_inshead->head[i]; cbt->next_stack[i] = NULL; } cbt->ins_head = new_inshead; } /* Choose a skiplist depth for this insert. */ skipdepth = __wt_skip_choose_depth(); /* * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and * update the cursor to reference it. */ WT_ERR(__col_insert_alloc( session, recno, skipdepth, &ins, &ins_size)); WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, &upd->txnid)); logged = 1; ins->upd = upd; ins_size += upd_size; cbt->ins = ins; /* Insert or append the WT_INSERT structure. */ if (op == 1) { /* * The serialized function clears ins: take a copy of * the pointer so we can look up the record number. */ ins_copy = ins; WT_ERR(__wt_col_append_serial(session, page, cbt->write_gen, inshead, cbt->ins_stack, cbt->next_stack, &new_inslist, new_inslist_size, &new_inshead, new_inshead_size, &ins, ins_size, skipdepth)); /* Put the new recno into the cursor. */ cbt->recno = WT_INSERT_RECNO(ins_copy); } else WT_ERR(__wt_insert_serial(session, page, cbt->write_gen, inshead, cbt->ins_stack, cbt->next_stack, &new_inslist, new_inslist_size, &new_inshead, new_inshead_size, &ins, ins_size, skipdepth)); } if (0) { err: /* * Remove the update from the current transaction, so we don't * try to modify it on rollback. */ if (logged) __wt_txn_unmodify(session); __wt_free(session, ins); __wt_free(session, upd); } __wt_free(session, new_inslist); __wt_free(session, new_inshead); return (ret); }