/* * __cursor_var_append_next -- * Return the next variable-length entry on the append list. */ static inline int __cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage) { WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; if (newpage) { cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_page; } for (;;) { cbt->ins = WT_SKIP_NEXT(cbt->ins); new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_upd_visible_all(session, upd)) ++cbt->page_deleted_count; continue; } return (__wt_value_return(session, cbt, upd)); } /* NOTREACHED */ }
/* * __cursor_var_append_next -- * Return the next variable-length entry on the append list. */ static inline int __cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage) { WT_ITEM *val; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; val = &cbt->iface.value; if (newpage) { cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_page; } for (;;) { cbt->ins = WT_SKIP_NEXT(cbt->ins); new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL || WT_UPDATE_DELETED_ISSET(upd)) continue; val->data = WT_UPDATE_DATA(upd); val->size = upd->size; break; } return (0); }
/*btree cursor移向下一个记录,仅仅在append list上移动*/ static inline int __cursor_fix_append_next(WT_CURSOR_BTREE* cbt, int newpage) { WT_ITEM *val; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; val = &cbt->iface.value; /*新载入的page,判断ins_head是否为空,如果为空表示没有append的记录*/ if (newpage){ if ((cbt->ins = WT_SKIP_FIRST(cbt->ins_head)) == NULL) return (WT_NOTFOUND); } else{ /*已经到append list的最后一条记录了,后面没有记录*/ if (cbt->recno >= WT_INSERT_RECNO(cbt->ins) && (cbt->ins = WT_SKIP_NEXT(cbt->ins)) == NULL) return (WT_NOTFOUND); } /* * This code looks different from the cursor-previous code. The append * list appears on the last page of the tree, but it may be preceded by * other rows, which means the cursor's recno will be set to a value and * we simply want to increment it. If the cursor's recno is NOT set, * we're starting our iteration in a tree that has only appended items. * In that case, recno will be 0 and happily enough the increment will * set it to 1, which is correct. */ __cursor_set_recno(cbt, cbt->recno + 1); /* * Fixed-width column store appends are inherently non-transactional. * Even a non-visible update by a concurrent or aborted transaction * changes the effective end of the data. The effect is subtle because * of the blurring between deleted and empty values, but ideally we * would skip all uncommitted changes at the end of the data. This * doesn't apply to variable-width column stores because the implicitly * created records written by reconciliation are deleted and so can be * never seen by a read. * * The problem is that we don't know at this point whether there may be * multiple uncommitted changes at the end of the data, and it would be * expensive to check every time we hit an aborted update. If an * insert is aborted, we simply return zero (empty), regardless of * whether we are at the end of the data. */ if (cbt->recno < WT_INSERT_RECNO(cbt->ins) || (upd = __wt_txn_read(session, cbt->ins->upd)) == NULL){ /*没有可见的记录值,直接返回0*/ cbt->v = 0; val->data = &cbt->v; } else val->data = WT_UPDATE_DATA(upd); val->size = 1; return 0; }
/* * __free_skip_list -- * Walk a WT_INSERT forward-linked list and free the per-thread combination * of a WT_INSERT structure and its associated chain of WT_UPDATE structures. */ static void __free_skip_list(WT_SESSION_IMPL *session, WT_INSERT *ins) { WT_INSERT *next; do { __free_update_list(session, ins->upd); next = WT_SKIP_NEXT(ins); __wt_free(session, ins); } while ((ins = next) != NULL); }
/*在append list上移动variable-length类型的btree cursor*/ static inline int __cursor_var_append_next(WT_CURSOR_BTREE* cbt, int newpage) { WT_ITEM *val; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; val = &cbt->iface.value; if (newpage){ cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_page; } for (;;){ cbt->ins = WT_SKIP_NEXT(cbt->ins); new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); /*事务隔离读,对本事务不可见,继续向前*/ if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; /*删除集合,不做指向这条记录,继续向下移动*/ if (WT_UPDATE_DELETED_ISSET(upd)) { ++cbt->page_deleted_count; continue; } /*赋值value*/ val->data = WT_UPDATE_DATA(upd); val->size = upd->size; return 0; } }
/* * __cursor_row_next -- * Move to the next row-store item. */ static inline int __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) { WT_INSERT *ins; WT_ITEM *key; WT_PAGE *page; WT_ROW *rip; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; page = cbt->ref->page; key = &cbt->iface.key; /* * For row-store pages, we need a single item that tells us the part * of the page we're walking (otherwise switching from next to prev * and vice-versa is just too complicated), so we map the WT_ROW and * WT_INSERT_HEAD insert array slots into a single name space: slot 1 * is the "smallest key insert list", slot 2 is WT_ROW[0], slot 3 is * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are * odd-numbered slots, and WT_ROW array slots are even-numbered slots. * * Initialize for each new page. */ if (newpage) { cbt->ins_head = WT_ROW_INSERT_SMALLEST(page); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); cbt->row_iteration_slot = 1; cbt->rip_saved = NULL; goto new_insert; } /* Move to the next entry and return the item. */ for (;;) { /* * Continue traversing any insert list; maintain the insert list * head reference and entry count in case we switch to a cursor * previous movement. */ if (cbt->ins != NULL) cbt->ins = WT_SKIP_NEXT(cbt->ins); new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_upd_visible_all(session, upd)) ++cbt->page_deleted_count; continue; } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); return (__wt_value_return(session, cbt, upd)); } /* Check for the end of the page. */ if (cbt->row_iteration_slot >= page->entries * 2 + 1) return (WT_NOTFOUND); ++cbt->row_iteration_slot; /* * Odd-numbered slots configure as WT_INSERT_HEAD entries, * even-numbered slots configure as WT_ROW entries. */ if (cbt->row_iteration_slot & 0x01) { cbt->ins_head = WT_ROW_INSERT_SLOT( page, cbt->row_iteration_slot / 2 - 1); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_insert; } cbt->ins_head = NULL; cbt->ins = NULL; cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); if (upd != NULL && upd->type == WT_UPDATE_DELETED) { if (__wt_txn_upd_visible_all(session, upd)) ++cbt->page_deleted_count; continue; } return (__cursor_row_slot_return(cbt, rip, upd)); } /* NOTREACHED */ }
/*移向行存储的下一个行对象*/ static inline int __cursor_row_next(WT_CURSOR_BTREE* cbt, int newpage) { WT_INSERT *ins; WT_ITEM *key, *val; WT_PAGE *page; WT_ROW *rip; WT_SESSION_IMPL *session; WT_UPDATE *upd; session = (WT_SESSION_IMPL *)cbt->iface.session; page = cbt->ref->page; key = &cbt->iface.key; val = &cbt->iface.value; /*假如是newpage,定位到insert修改队列的头位置*/ if (newpage){ cbt->ins_head = WT_ROW_INSERT_SMALLEST(page); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); cbt->row_iteration_slot = 1; goto new_insert; } for (;;){ if (cbt->ins != NULL) cbt->ins = WT_SKIP_NEXT(cbt->ins); new_insert: if ((ins = cbt->ins) != NULL) { /*事务可见数据读取*/ if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; /*判断是否删除,如果删除,跳过被删除的对象*/ if (WT_UPDATE_DELETED_ISSET(upd)) { ++cbt->page_deleted_count; continue; } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); val->data = WT_UPDATE_DATA(upd); val->size = upd->size; return 0; } /*检索page row entires数组, 到了page的末尾*/ if (cbt->row_iteration_slot >= page->pg_row_entries * 2 + 1) return (WT_NOTFOUND); ++cbt->row_iteration_slot; /* * Odd-numbered slots configure as WT_INSERT_HEAD entries, * even-numbered slots configure as WT_ROW entries. */ if (cbt->row_iteration_slot & 0x01) { cbt->ins_head = WT_ROW_INSERT_SLOT(page, cbt->row_iteration_slot / 2 - 1); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); goto new_insert; } cbt->ins_head = NULL; cbt->ins = NULL; /*计算定位slot*/ cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row_d[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { ++cbt->page_deleted_count; continue; } return __cursor_row_slot_return(cbt, rip, upd); } }