/* * __cursor_var_append_next -- * Return the next variable-length entry on the append list. */ static inline int __cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage) { WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; if (newpage) { cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_page; } for (;;) { cbt->ins = WT_SKIP_NEXT(cbt->ins); new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_upd_visible_all(session, upd)) ++cbt->page_deleted_count; continue; } return (__wt_value_return(session, cbt, upd)); } /* NOTREACHED */ }
/* * __cursor_var_append_next -- * Return the next variable-length entry on the append list. */ static inline int __cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage) { WT_ITEM *val; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; val = &cbt->iface.value; if (newpage) { cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_page; } for (;;) { cbt->ins = WT_SKIP_NEXT(cbt->ins); new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL || WT_UPDATE_DELETED_ISSET(upd)) continue; val->data = WT_UPDATE_DATA(upd); val->size = upd->size; break; } return (0); }
/*btree cursor移向下一个记录,仅仅在append list上移动*/ static inline int __cursor_fix_append_next(WT_CURSOR_BTREE* cbt, int newpage) { WT_ITEM *val; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; val = &cbt->iface.value; /*新载入的page,判断ins_head是否为空,如果为空表示没有append的记录*/ if (newpage){ if ((cbt->ins = WT_SKIP_FIRST(cbt->ins_head)) == NULL) return (WT_NOTFOUND); } else{ /*已经到append list的最后一条记录了,后面没有记录*/ if (cbt->recno >= WT_INSERT_RECNO(cbt->ins) && (cbt->ins = WT_SKIP_NEXT(cbt->ins)) == NULL) return (WT_NOTFOUND); } /* * This code looks different from the cursor-previous code. The append * list appears on the last page of the tree, but it may be preceded by * other rows, which means the cursor's recno will be set to a value and * we simply want to increment it. If the cursor's recno is NOT set, * we're starting our iteration in a tree that has only appended items. * In that case, recno will be 0 and happily enough the increment will * set it to 1, which is correct. */ __cursor_set_recno(cbt, cbt->recno + 1); /* * Fixed-width column store appends are inherently non-transactional. * Even a non-visible update by a concurrent or aborted transaction * changes the effective end of the data. The effect is subtle because * of the blurring between deleted and empty values, but ideally we * would skip all uncommitted changes at the end of the data. This * doesn't apply to variable-width column stores because the implicitly * created records written by reconciliation are deleted and so can be * never seen by a read. * * The problem is that we don't know at this point whether there may be * multiple uncommitted changes at the end of the data, and it would be * expensive to check every time we hit an aborted update. If an * insert is aborted, we simply return zero (empty), regardless of * whether we are at the end of the data. */ if (cbt->recno < WT_INSERT_RECNO(cbt->ins) || (upd = __wt_txn_read(session, cbt->ins->upd)) == NULL){ /*没有可见的记录值,直接返回0*/ cbt->v = 0; val->data = &cbt->v; } else val->data = WT_UPDATE_DATA(upd); val->size = 1; return 0; }
/* * __free_page_col_fix -- * Discard a WT_PAGE_COL_FIX page. */ static void __free_page_col_fix(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_INSERT_HEAD *append; /* Free the append array. */ if ((append = WT_COL_APPEND(page)) != NULL) { __free_skip_list(session, WT_SKIP_FIRST(append)); __wt_free(session, append); __wt_free(session, page->modify->append); } /* Free the update array. */ if (page->modify != NULL && page->modify->update != NULL) __free_skip_array(session, page->modify->update, 1); }
/* * __free_page_modify -- * Discard the page's associated modification structures. */ static void __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_INSERT_HEAD *append; WT_PAGE_MODIFY *mod; mod = page->modify; switch (F_ISSET(mod, WT_PM_REC_MASK)) { case WT_PM_REC_SPLIT: /* * If the page split, there may one or more pages linked from * the page; walk the list, discarding pages. */ __wt_page_out(session, &mod->u.split); break; case WT_PM_REC_REPLACE: /* * Discard any replacement address: this memory is usually moved * into the parent's WT_REF, but at the root that can't happen. */ __wt_free(session, mod->u.replace.addr); break; default: break; } /* Free the append array. */ if ((append = WT_COL_APPEND(page)) != NULL) { __free_skip_list(session, WT_SKIP_FIRST(append)); __wt_free(session, append); __wt_free(session, mod->append); } /* Free the insert/update array. */ if (mod->update != NULL) __free_skip_array(session, mod->update, page->type == WT_PAGE_COL_FIX ? 1 : page->entries); /* Discard any objects the page was tracking plus associated memory. */ __wt_rec_track_discard(session, page); __wt_free(session, mod->track); __wt_free(session, page->modify); }
/* * __free_skip_array -- * Discard an array of skip list headers. */ static void __free_skip_array( WT_SESSION_IMPL *session, WT_INSERT_HEAD **head_arg, uint32_t entries) { WT_INSERT_HEAD **head; /* * For each non-NULL slot in the page's array of inserts, free the * linked list anchored in that slot. */ for (head = head_arg; entries > 0; --entries, ++head) if (*head != NULL) { __free_skip_list(session, WT_SKIP_FIRST(*head)); __wt_free(session, *head); } /* Free the page's array of inserts. */ __wt_free(session, head_arg); }
/* * __free_page_col_var -- * Discard a WT_PAGE_COL_VAR page. */ static void __free_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_INSERT_HEAD *append; /* Free the in-memory index array. */ __wt_free(session, page->u.col_var.d); /* Free the RLE lookup array. */ __wt_free(session, page->u.col_var.repeats); /* Free the append array. */ if ((append = WT_COL_APPEND(page)) != NULL) { __free_skip_list(session, WT_SKIP_FIRST(append)); __wt_free(session, append); __wt_free(session, page->modify->append); } /* Free the insert array. */ if (page->modify != NULL && page->modify->update != NULL) __free_skip_array(session, page->modify->update, page->entries); }
/*在append list上移动variable-length类型的btree cursor*/ static inline int __cursor_var_append_next(WT_CURSOR_BTREE* cbt, int newpage) { WT_ITEM *val; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; val = &cbt->iface.value; if (newpage){ cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_page; } for (;;){ cbt->ins = WT_SKIP_NEXT(cbt->ins); new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); /*事务隔离读,对本事务不可见,继续向前*/ if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; /*删除集合,不做指向这条记录,继续向下移动*/ if (WT_UPDATE_DELETED_ISSET(upd)) { ++cbt->page_deleted_count; continue; } /*赋值value*/ val->data = WT_UPDATE_DATA(upd); val->size = upd->size; return 0; } }
/* * __cursor_row_next -- * Move to the next row-store item. */ static inline int __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) { WT_INSERT *ins; WT_ITEM *key; WT_PAGE *page; WT_ROW *rip; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; page = cbt->ref->page; key = &cbt->iface.key; /* * For row-store pages, we need a single item that tells us the part * of the page we're walking (otherwise switching from next to prev * and vice-versa is just too complicated), so we map the WT_ROW and * WT_INSERT_HEAD insert array slots into a single name space: slot 1 * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are * odd-numbered slots, and WT_ROW array slots are even-numbered slots. * * Initialize for each new page. */ if (newpage) { cbt->ins_head = WT_ROW_INSERT_SMALLEST(page); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); cbt->row_iteration_slot = 1; cbt->rip_saved = NULL; goto new_insert; } /* Move to the next entry and return the item. */ for (;;) { /* * Continue traversing any insert list; maintain the insert list * head reference and entry count in case we switch to a cursor * previous movement. */ if (cbt->ins != NULL) cbt->ins = WT_SKIP_NEXT(cbt->ins); new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_upd_visible_all(session, upd)) ++cbt->page_deleted_count; continue; } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); return (__wt_value_return(session, cbt, upd)); } /* Check for the end of the page. */ if (cbt->row_iteration_slot >= page->entries * 2 + 1) return (WT_NOTFOUND); ++cbt->row_iteration_slot; /* * Odd-numbered slots configure as WT_INSERT_HEAD entries, * even-numbered slots configure as WT_ROW entries. */ if (cbt->row_iteration_slot & 0x01) { cbt->ins_head = WT_ROW_INSERT_SLOT( page, cbt->row_iteration_slot / 2 - 1); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_insert; } cbt->ins_head = NULL; cbt->ins = NULL; cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); if (upd != NULL && upd->type == WT_UPDATE_DELETED) { if (__wt_txn_upd_visible_all(session, upd)) ++cbt->page_deleted_count; continue; } return (__cursor_row_slot_return(cbt, rip, upd)); } /* NOTREACHED */ }
/*移向行存储的下一个行对象*/ static inline int __cursor_row_next(WT_CURSOR_BTREE* cbt, int newpage) { WT_INSERT *ins; WT_ITEM *key, *val; WT_PAGE *page; WT_ROW *rip; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; page = cbt->ref->page; key = &cbt->iface.key; val = &cbt->iface.value; /*假如是newpage,定位到insert修改队列的头位置*/ if (newpage){ cbt->ins_head = WT_ROW_INSERT_SMALLEST(page); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); cbt->row_iteration_slot = 1; goto new_insert; } for (;;){ if (cbt->ins != NULL) cbt->ins = WT_SKIP_NEXT(cbt->ins); new_insert: if ((ins = cbt->ins) != NULL) { /*事务可见数据读取*/ if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; /*判断是否删除,如果删除,跳过被删除的对象*/ if (WT_UPDATE_DELETED_ISSET(upd)) { ++cbt->page_deleted_count; continue; } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); val->data = WT_UPDATE_DATA(upd); val->size = upd->size; return 0; } /*检索page row entires数组, 到了page的末尾*/ if (cbt->row_iteration_slot >= page->pg_row_entries * 2 + 1) return (WT_NOTFOUND); ++cbt->row_iteration_slot; /* * Odd-numbered slots configure as WT_INSERT_HEAD entries, * even-numbered slots configure as WT_ROW entries. */ if (cbt->row_iteration_slot & 0x01) { cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->row_iteration_slot / 2 - 1); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_insert; } cbt->ins_head = NULL; cbt->ins = NULL; /*计算定位slot*/ cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row_d[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { ++cbt->page_deleted_count; continue; } return __cursor_row_slot_return(cbt, rip, upd); } }
/* * __free_page_modify -- * Discard the page's associated modification structures. */ static void __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_INSERT_HEAD *append; WT_MULTI *multi; WT_PAGE_MODIFY *mod; uint32_t i; bool update_ignore; mod = page->modify; /* In some failed-split cases, we can't discard updates. */ update_ignore = F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE); switch (mod->rec_result) { case WT_PM_REC_MULTIBLOCK: /* Free list of replacement blocks. */ for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) { switch (page->type) { case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: __wt_free(session, multi->key.ikey); break; } __wt_free(session, multi->supd); __wt_free(session, multi->disk_image); __wt_free(session, multi->addr.addr); } __wt_free(session, mod->mod_multi); break; case WT_PM_REC_REPLACE: /* * Discard any replacement address: this memory is usually moved * into the parent's WT_REF, but at the root that can't happen. */ __wt_free(session, mod->mod_replace.addr); break; } switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: /* Free the append array. */ if ((append = WT_COL_APPEND(page)) != NULL) { __free_skip_list( session, WT_SKIP_FIRST(append), update_ignore); __wt_free(session, append); __wt_free(session, mod->mod_append); } /* Free the insert/update array. */ if (mod->mod_update != NULL) __free_skip_array(session, mod->mod_update, page->type == WT_PAGE_COL_FIX ? 1 : page->pg_var_entries, update_ignore); break; } /* Free the overflow on-page, reuse and transaction-cache skiplists. */ __wt_ovfl_reuse_free(session, page); __wt_ovfl_txnc_free(session, page); __wt_ovfl_discard_free(session, page); __wt_free(session, page->modify->ovfl_track); __wt_free(session, page->modify); }
/* * __wt_row_modify -- * Row-store insert, update and delete. */ int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int is_remove) { WT_DECL_RET; WT_INSERT *ins; WT_INSERT_HEAD *ins_head, **ins_headp; WT_ITEM *key, *value; WT_PAGE *page; WT_UPDATE *old_upd, *upd, **upd_entry; size_t ins_size, upd_size; uint32_t ins_slot; u_int i, skipdepth; int logged; key = &cbt->iface.key; value = is_remove ? NULL : &cbt->iface.value; page = cbt->page; /* If we don't yet have a modify structure, we'll need one. */ WT_RET(__wt_page_modify_init(session, page)); ins = NULL; upd = NULL; logged = 0; /* * Modify: allocate an update array as necessary, build a WT_UPDATE * structure, and call a serialized function to insert the WT_UPDATE * structure. * * Insert: allocate an insert array as necessary, build a WT_INSERT * and WT_UPDATE structure pair, and call a serialized function to * insert the WT_INSERT structure. */ if (cbt->compare == 0) { if (cbt->ins == NULL) { /* Allocate an update array as necessary. */ WT_PAGE_ALLOC_AND_SWAP(session, page, page->u.row.upd, upd_entry, page->entries); /* Set the WT_UPDATE array reference. */ upd_entry = &page->u.row.upd[cbt->slot]; } else upd_entry = &cbt->ins->upd; /* Make sure the update can proceed. */ WT_ERR(__wt_txn_update_check(session, old_upd = *upd_entry)); /* Allocate the WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, cbt, upd)); logged = 1; /* * Point the new WT_UPDATE item to the next element in the list. * If we get it right, the serialization function lock acts as * our memory barrier to flush this write. */ upd->next = old_upd; /* Serialize the update. */ WT_ERR(__wt_update_serial( session, page, upd_entry, &upd, upd_size)); } else { /* * Allocate the insert array as necessary. * * We allocate an additional insert array slot for insert keys * sorting less than any key on the page. The test to select * that slot is baroque: if the search returned the first page * slot, we didn't end up processing an insert list, and the * comparison value indicates the search key was smaller than * the returned slot, then we're using the smallest-key insert * slot. That's hard, so we set a flag. */ WT_PAGE_ALLOC_AND_SWAP(session, page, page->u.row.ins, ins_headp, page->entries + 1); ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ? page->entries : cbt->slot; ins_headp = &page->u.row.ins[ins_slot]; /* Allocate the WT_INSERT_HEAD structure as necessary. */ WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1); ins_head = *ins_headp; /* Choose a skiplist depth for this insert. */ skipdepth = __wt_skip_choose_depth(); /* * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and * update the cursor to reference it. */ WT_ERR(__wt_row_insert_alloc( session, key, skipdepth, &ins, &ins_size)); WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); ins->upd = upd; ins_size += upd_size; /* * Update the cursor: the WT_INSERT_HEAD might be allocated, * the WT_INSERT was allocated. */ cbt->ins_head = ins_head; cbt->ins = ins; WT_ERR(__wt_txn_modify(session, cbt, upd)); logged = 1; /* * If there was no insert list during the search, the cursor's * information cannot be correct, search couldn't have * initialized it. * * Otherwise, point the new WT_INSERT item's skiplist to the * next elements in the insert list (which we will check are * still valid inside the serialization function). * * The serial mutex acts as our memory barrier to flush these * writes before inserting them into the list. */ if (WT_SKIP_FIRST(ins_head) == NULL) for (i = 0; i < skipdepth; i++) { cbt->ins_stack[i] = &ins_head->head[i]; ins->next[i] = cbt->next_stack[i] = NULL; } else for (i = 0; i < skipdepth; i++) ins->next[i] = cbt->next_stack[i]; /* Insert the WT_INSERT structure. */ WT_ERR(__wt_insert_serial( session, page, cbt->ins_head, cbt->ins_stack, &ins, ins_size, skipdepth)); } if (0) { err: /* * Remove the update from the current transaction, so we don't * try to modify it on rollback. */ if (logged) __wt_txn_unmodify(session); __wt_free(session, ins); cbt->ins = NULL; __wt_free(session, upd); } return (ret); }
/* * __wt_col_modify -- * Column-store delete, insert, and update. */ int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd, int is_remove) { WT_BTREE *btree; WT_DECL_RET; WT_INSERT *ins; WT_INSERT_HEAD *ins_head, **ins_headp; WT_ITEM _value; WT_PAGE *page; WT_UPDATE *old_upd; size_t ins_size, upd_size; u_int i, skipdepth; int append, logged; btree = cbt->btree; ins = NULL; page = cbt->ref->page; append = logged = 0; /* This code expects a remove to have a NULL value. */ if (is_remove) { if (btree->type == BTREE_COL_FIX) { value = &_value; value->data = ""; value->size = 1; } else value = NULL; } else { /* * There's some chance the application specified a record past * the last record on the page. If that's the case, and we're * inserting a new WT_INSERT/WT_UPDATE pair, it goes on the * append list, not the update list. In addition, a recno of 0 * implies an append operation, we're allocating a new row. */ if (recno == 0 || recno > (btree->type == BTREE_COL_VAR ? __col_var_last_recno(page) : __col_fix_last_recno(page))) append = 1; } /* If we don't yet have a modify structure, we'll need one. */ WT_RET(__wt_page_modify_init(session, page)); /* * Delete, insert or update a column-store entry. * * If modifying a previously modified record, create a new WT_UPDATE * entry and have a serialized function link it into an existing * WT_INSERT entry's WT_UPDATE list. * * Else, allocate an insert array as necessary, build a WT_INSERT and * WT_UPDATE structure pair, and call a serialized function to insert * the WT_INSERT structure. */ if (cbt->compare == 0 && cbt->ins != NULL) { /* * If we are restoring updates that couldn't be evicted, the * key must not exist on the new page. */ WT_ASSERT(session, upd == NULL); /* Make sure the update can proceed. */ WT_ERR(__wt_txn_update_check( session, old_upd = cbt->ins->upd)); /* Allocate a WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, upd)); logged = 1; /* Avoid a data copy in WT_CURSOR.update. */ cbt->modify_update = upd; /* * Point the new WT_UPDATE item to the next element in the list. * If we get it right, the serialization function lock acts as * our memory barrier to flush this write. */ upd->next = old_upd; /* Serialize the update. */ WT_ERR(__wt_update_serial( session, page, &cbt->ins->upd, &upd, upd_size)); } else { /* Allocate the append/update list reference as necessary. */ if (append) { WT_PAGE_ALLOC_AND_SWAP(session, page, page->modify->mod_append, ins_headp, 1); ins_headp = &page->modify->mod_append[0]; } else if (page->type == WT_PAGE_COL_FIX) { WT_PAGE_ALLOC_AND_SWAP(session, page, page->modify->mod_update, ins_headp, 1); ins_headp = &page->modify->mod_update[0]; } else { WT_PAGE_ALLOC_AND_SWAP(session, page, page->modify->mod_update, ins_headp, page->pg_var_entries); ins_headp = &page->modify->mod_update[cbt->slot]; } /* Allocate the WT_INSERT_HEAD structure as necessary. */ WT_PAGE_ALLOC_AND_SWAP(session, page, *ins_headp, ins_head, 1); ins_head = *ins_headp; /* Choose a skiplist depth for this insert. */ skipdepth = __wt_skip_choose_depth(session); /* * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and * update the cursor to reference it (the WT_INSERT_HEAD might * be allocated, the WT_INSERT was allocated). */ WT_ERR(__col_insert_alloc( session, recno, skipdepth, &ins, &ins_size)); cbt->ins_head = ins_head; cbt->ins = ins; if (upd == NULL) { WT_ERR( __wt_update_alloc(session, value, &upd, &upd_size)); WT_ERR(__wt_txn_modify(session, upd)); logged = 1; /* Avoid a data copy in WT_CURSOR.update. */ cbt->modify_update = upd; } else upd_size = __wt_update_list_memsize(upd); ins->upd = upd; ins_size += upd_size; /* * If there was no insert list during the search, or there was * no search because the record number has not been allocated * yet, the cursor's information cannot be correct, search * couldn't have initialized it. * * Otherwise, point the new WT_INSERT item's skiplist to the * next elements in the insert list (which we will check are * still valid inside the serialization function). * * The serial mutex acts as our memory barrier to flush these * writes before inserting them into the list. */ if (WT_SKIP_FIRST(ins_head) == NULL || recno == 0) for (i = 0; i < skipdepth; i++) { cbt->ins_stack[i] = &ins_head->head[i]; ins->next[i] = cbt->next_stack[i] = NULL; } else for (i = 0; i < skipdepth; i++) ins->next[i] = cbt->next_stack[i]; /* Append or insert the WT_INSERT structure. */ if (append) WT_ERR(__wt_col_append_serial( session, page, cbt->ins_head, cbt->ins_stack, &ins, ins_size, &cbt->recno, skipdepth)); else WT_ERR(__wt_insert_serial( session, page, cbt->ins_head, cbt->ins_stack, &ins, ins_size, skipdepth)); } /* If the update was successful, add it to the in-memory log. */ if (logged) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { err: /* * Remove the update from the current transaction, so we don't * try to modify it on rollback. */ if (logged) __wt_txn_unmodify(session); __wt_free(session, ins); __wt_free(session, upd); } return (ret); }