/* * __stat_page_col_var -- * Stat a WT_PAGE_COL_VAR page. */ static int __stat_page_col_var(WT_PAGE *page, WT_DSRC_STATS *stats) { WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; WT_COL *cip; WT_INSERT *ins; WT_UPDATE *upd; uint32_t i; int orig_deleted; unpack = &_unpack; WT_STAT_INCR(stats, btree_column_variable); /* * Walk the page, counting regular and overflow data items, and checking * to be sure any updates weren't deletions. If the item was updated, * assume it was updated by an item of the same size (it's expensive to * figure out if it will require the same space or not, especially if * there's Huffman encoding). */ WT_COL_FOREACH(page, cip, i) { if ((cell = WT_COL_PTR(page, cip)) == NULL) { orig_deleted = 1; WT_STAT_INCR(stats, btree_column_deleted); } else { orig_deleted = 0; __wt_cell_unpack(cell, unpack); WT_STAT_INCRV( stats, btree_entries, __wt_cell_rle(unpack)); } /* * Walk the insert list, checking for changes. For each insert * we find, correct the original count based on its state. */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE(page, cip)) { upd = ins->upd; if (WT_UPDATE_DELETED_ISSET(upd)) { if (orig_deleted) continue; WT_STAT_INCR(stats, btree_column_deleted); WT_STAT_DECR(stats, btree_entries); } else { if (!orig_deleted) continue; WT_STAT_DECR(stats, btree_column_deleted); WT_STAT_INCR(stats, btree_entries); } } }
/* * __ovfl_cache_col_visible -- * column-store: check for a globally visible update. */ static bool __ovfl_cache_col_visible( WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_CELL_UNPACK *unpack) { /* * Column-store is harder than row_store: we're here because there's a * reader in the system that might read the original version of an * overflow record, which might match a number of records. For example, * the original overflow value was for records 100-200, we've replaced * each of those records individually, but there exists a reader that * might read any one of those records, and all of those records have * different update entries with different transaction IDs. Since it's * infeasible to determine if there's a globally visible update for each * reader for each record, we test the simple case where a single record * has a single, globally visible update. If that's not the case, cache * the value. */ if (__wt_cell_rle(unpack) == 1 && upd != NULL && /* Sanity: upd should always be set. */ __wt_txn_visible_all(session, upd->txnid)) return (true); return (false); }
/* * __cursor_var_prev -- * Move to the previous, variable-length column-store item. */ static inline int __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) { WT_CELL *cell; WT_CELL_UNPACK unpack; WT_COL *cip; WT_INSERT *ins; WT_ITEM *val; WT_PAGE *page; WT_SESSION_IMPL *session; WT_UPDATE *upd; uint64_t rle_start; session = (WT_SESSION_IMPL *)cbt->iface.session; page = cbt->ref->page; val = &cbt->iface.value; rle_start = 0; /* -Werror=maybe-uninitialized */ /* Initialize for each new page. */ if (newpage) { cbt->last_standard_recno = __col_var_last_recno(page); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->last_standard_recno); goto new_page; } /* Move to the previous entry and return the item. */ for (;;) { __cursor_set_recno(cbt, cbt->recno - 1); new_page: if (cbt->recno < page->pg_var_recno) return (WT_NOTFOUND); /* Find the matching WT_COL slot. */ if ((cip = __col_var_search(page, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); cbt->slot = WT_COL_SLOT(page, cip); /* Check any insert list for a matching record. */ cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot); cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno); upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { if (WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; } val->data = WT_UPDATE_DATA(upd); val->size = upd->size; return (0); } /* * If we're at the same slot as the last reference and there's * no matching insert list item, re-use the return information * (so encoded items with large repeat counts aren't repeatedly * decoded). Otherwise, unpack the cell and build the return * information. */ if (cbt->cip_saved != cip) { if ((cell = WT_COL_PTR(page, cip)) == NULL) continue; __wt_cell_unpack(cell, &unpack); if (unpack.type == WT_CELL_DEL) { if (__wt_cell_rle(&unpack) == 1) continue; /* * There can be huge gaps in the variable-length * column-store name space appearing as deleted * records. If more than one deleted record, do * the work of finding the next record to return * instead of looping through the records. * * First, find the largest record in the update * list that's smaller than the current record. */ ins = __col_insert_search_lt( cbt->ins_head, cbt->recno); /* * Second, for records with RLEs greater than 1, * the above call to __col_var_search located * this record in the page's list of repeating * records, and returned the starting record. * The starting record - 1 is the record to * which we could skip, if there was no larger * record in the update list. */ cbt->recno = rle_start - 1; if (ins != NULL && WT_INSERT_RECNO(ins) > cbt->recno) cbt->recno = WT_INSERT_RECNO(ins); /* Adjust for the outer loop decrement. */ ++cbt->recno; continue; } WT_RET(__wt_page_cell_data_ref( session, page, &unpack, cbt->tmp)); cbt->cip_saved = cip; } val->data = cbt->tmp->data; val->size = cbt->tmp->size; return (0); } /* NOTREACHED */ }
/*移向下条variable-length column-store 记录*/ static inline int __cursor_var_next(WT_CURSOR_BTREE* cbt, int newpage) { WT_CELL *cell; WT_CELL_UNPACK unpack; WT_COL *cip; WT_ITEM *val; WT_INSERT *ins; WT_PAGE *page; WT_SESSION_IMPL *session; WT_UPDATE *upd; uint64_t rle, rle_start; session = (WT_SESSION_IMPL *)cbt->iface.session; page = cbt->ref->page; val = &cbt->iface.value; rle_start = 0; /* -Werror=maybe-uninitialized */ if (newpage){ cbt->last_standard_recno = __col_var_last_recno(page); if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, page->pg_var_recno); goto new_page; } for (;;){ if (cbt->recno >= cbt->last_standard_recno) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->recno + 1); new_page: /*定位到recno对应的WT_COL slot*/ if ((cip = __col_var_search(page, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); cbt->slot = WT_COL_SLOT(page, cip); /*读取内容值*/ cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot); cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno); upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { if (WT_UPDATE_DELETED_ISSET(upd)) { ++cbt->page_deleted_count; continue; } val->data = WT_UPDATE_DATA(upd); val->size = upd->size; return (0); } /* * If we're at the same slot as the last reference and there's * no matching insert list item, re-use the return information * (so encoded items with large repeat counts aren't repeatedly * decoded). Otherwise, unpack the cell and build the return * information. * upd == NULL, 记录可能被删除放入到了insert列表中,slot可能被重用了,那么需要进行cell unpack取值 */ if (cbt->cip_saved != cip) { if ((cell = WT_COL_PTR(page, cip)) == NULL) continue; __wt_cell_unpack(cell, &unpack); if (unpack.type == WT_CELL_DEL) { if ((rle = __wt_cell_rle(&unpack)) == 1) continue; /*定位到修改列表中的记录*/ ins = __col_insert_search_gt(cbt->ins_head, cbt->recno); cbt->recno = rle_start + rle; if (ins != NULL && WT_INSERT_RECNO(ins) < cbt->recno) cbt->recno = WT_INSERT_RECNO(ins); /* Adjust for the outer loop increment. */ --cbt->recno; continue; } /*取出cell中的值到tmp中*/ WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &cbt->tmp)); cbt->cip_saved = cip; } val->data = cbt->tmp.data; val->size = cbt->tmp.size; return 0; } }
/* * __verify_tree -- * Verify a tree, recursively descending through it in depth-first fashion. * The page argument was physically verified (so we know it's correctly formed), * and the in-memory version built. Our job is to check logical relationships * in the page and in the tree. */ static int __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs) { WT_BM *bm; WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; WT_COL *cip; WT_DECL_RET; WT_PAGE *page; WT_REF *child_ref; uint64_t recno; uint32_t entry, i; bool found; bm = S2BT(session)->bm; page = ref->page; unpack = &_unpack; WT_CLEAR(*unpack); /* -Wuninitialized */ WT_RET(__wt_verbose(session, WT_VERB_VERIFY, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type))); /* Optionally dump the address. */ if (vs->dump_address) WT_RET(__wt_msg(session, "%s %s", __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type))); /* Track the shape of the tree. */ if (WT_PAGE_IS_INTERNAL(page)) ++vs->depth_internal[ WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)]; else ++vs->depth_leaf[ WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)]; /* * The page's physical structure was verified when it was read into * memory by the read server thread, and then the in-memory version * of the page was built. Now we make sure the page and tree are * logically consistent. * * !!! * The problem: (1) the read server has to build the in-memory version * of the page because the read server is the thread that flags when * any thread can access the page in the tree; (2) we can't build the * in-memory version of the page until the physical structure is known * to be OK, so the read server has to verify at least the physical * structure of the page; (3) doing complete page verification requires * reading additional pages (for example, overflow keys imply reading * overflow pages in order to test the key's order in the page); (4) * the read server cannot read additional pages because it will hang * waiting on itself. For this reason, we split page verification * into a physical verification, which allows the in-memory version * of the page to be built, and then a subsequent logical verification * which happens here. * * Report progress occasionally. */ #define WT_VERIFY_PROGRESS_INTERVAL 100 if (++vs->fcnt % WT_VERIFY_PROGRESS_INTERVAL == 0) WT_RET(__wt_progress(session, NULL, vs->fcnt)); #ifdef HAVE_DIAGNOSTIC /* Optionally dump the blocks or page in debugging mode. */ if (vs->dump_blocks) WT_RET(__wt_debug_disk(session, page->dsk, NULL)); if (vs->dump_pages) WT_RET(__wt_debug_page(session, page, NULL)); #endif /* * Column-store key order checks: check the page's record number and * then update the total record count. */ switch (page->type) { case WT_PAGE_COL_FIX: recno = page->pg_fix_recno; goto recno_chk; case WT_PAGE_COL_INT: recno = page->pg_intl_recno; goto recno_chk; case WT_PAGE_COL_VAR: recno = page->pg_var_recno; recno_chk: if (recno != vs->record_total + 1) WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64 " when the expected starting record is %" PRIu64, __wt_page_addr_string(session, ref, vs->tmp1), recno, vs->record_total + 1); break; } switch (page->type) { case WT_PAGE_COL_FIX: vs->record_total += page->pg_fix_entries; break; case WT_PAGE_COL_VAR: recno = 0; WT_COL_FOREACH(page, cip, i) if ((cell = WT_COL_PTR(page, cip)) == NULL) ++recno; else { __wt_cell_unpack(cell, unpack); recno += __wt_cell_rle(unpack); } vs->record_total += recno; break; } /* * Row-store leaf page key order check: it's a depth-first traversal, * the first key on this page should be larger than any key previously * seen. */ switch (page->type) { case WT_PAGE_ROW_LEAF: WT_RET(__verify_row_leaf_key_order(session, ref, vs)); break; } /* If it's not the root page, unpack the parent cell. */ if (!__wt_ref_is_root(ref)) { __wt_cell_unpack(ref->addr, unpack); /* Compare the parent cell against the page type. */ switch (page->type) { case WT_PAGE_COL_FIX: if (unpack->raw != WT_CELL_ADDR_LEAF_NO) goto celltype_err; break; case WT_PAGE_COL_VAR: if (unpack->raw != WT_CELL_ADDR_LEAF && unpack->raw != WT_CELL_ADDR_LEAF_NO) goto celltype_err; break; case WT_PAGE_ROW_LEAF: if (unpack->raw != WT_CELL_ADDR_DEL && unpack->raw != WT_CELL_ADDR_LEAF && unpack->raw != WT_CELL_ADDR_LEAF_NO) goto celltype_err; break; case WT_PAGE_COL_INT: case WT_PAGE_ROW_INT: if (unpack->raw != WT_CELL_ADDR_INT) celltype_err: WT_RET_MSG(session, WT_ERROR, "page at %s, of type %s, is referenced in " "its parent by a cell of type %s", __wt_page_addr_string( session, ref, vs->tmp1), __wt_page_type_string(page->type), __wt_cell_type_string(unpack->raw)); break; } } /* * Check overflow pages. We check overflow cells separately from other * tests that walk the page as it's simpler, and I don't care much how * fast table verify runs. */ switch (page->type) { case WT_PAGE_COL_VAR: case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: WT_RET(__verify_overflow_cell(session, ref, &found, vs)); if (__wt_ref_is_root(ref) || page->type == WT_PAGE_ROW_INT) break; /* * Object if a leaf-no-overflow address cell references a page * with overflow keys, but don't object if a leaf address cell * references a page without overflow keys. Reconciliation * doesn't guarantee every leaf page without overflow items will * be a leaf-no-overflow type. */ if (found && unpack->raw == WT_CELL_ADDR_LEAF_NO) WT_RET_MSG(session, WT_ERROR, "page at %s, of type %s and referenced in its " "parent by a cell of type %s, contains overflow " "items", __wt_page_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type), __wt_cell_type_string(WT_CELL_ADDR_LEAF_NO)); break; } /* Check tree connections and recursively descend the tree. */ switch (page->type) { case WT_PAGE_COL_INT: /* For each entry in an internal page, verify the subtree. */ entry = 0; WT_INTL_FOREACH_BEGIN(session, page, child_ref) { /* * It's a depth-first traversal: this entry's starting * record number should be 1 more than the total records * reviewed to this point. */ ++entry; if (child_ref->key.recno != vs->record_total + 1) { WT_RET_MSG(session, WT_ERROR, "the starting record number in entry %" PRIu32 " of the column internal page at " "%s is %" PRIu64 " and the expected " "starting record number is %" PRIu64, entry, __wt_page_addr_string( session, child_ref, vs->tmp1), child_ref->key.recno, vs->record_total + 1); } /* Verify the subtree. */ ++vs->depth; WT_RET(__wt_page_in(session, child_ref, 0)); ret = __verify_tree(session, child_ref, vs); WT_TRET(__wt_page_release(session, child_ref, 0)); --vs->depth; WT_RET(ret); __wt_cell_unpack(child_ref->addr, unpack); WT_RET(bm->verify_addr( bm, session, unpack->data, unpack->size)); } WT_INTL_FOREACH_END; break; case WT_PAGE_ROW_INT: /* For each entry in an internal page, verify the subtree. */ entry = 0; WT_INTL_FOREACH_BEGIN(session, page, child_ref) { /* * It's a depth-first traversal: this entry's starting * key should be larger than the largest key previously * reviewed. * * The 0th key of any internal page is magic, and we * can't test against it. */ ++entry; if (entry != 1) WT_RET(__verify_row_int_key_order( session, page, child_ref, entry, vs)); /* Verify the subtree. */ ++vs->depth; WT_RET(__wt_page_in(session, child_ref, 0)); ret = __verify_tree(session, child_ref, vs); WT_TRET(__wt_page_release(session, child_ref, 0)); --vs->depth; WT_RET(ret); __wt_cell_unpack(child_ref->addr, unpack); WT_RET(bm->verify_addr( bm, session, unpack->data, unpack->size)); } WT_INTL_FOREACH_END;
/* * __stat_page_col_var -- * Stat a WT_PAGE_COL_VAR page. */ static void __stat_page_col_var( WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) { WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; WT_COL *cip; WT_INSERT *ins; WT_UPDATE *upd; uint64_t deleted_cnt, entry_cnt, ovfl_cnt, rle_cnt; uint32_t i; bool orig_deleted; unpack = &_unpack; deleted_cnt = entry_cnt = ovfl_cnt = rle_cnt = 0; WT_STAT_INCR(session, stats, btree_column_variable); /* * Walk the page counting regular items, adjusting if the item has been * subsequently deleted or not. This is a mess because 10-item RLE might * have 3 of the items subsequently deleted. Overflow items are harder, * we can't know if an updated item will be an overflow item or not; do * our best, and simply count every overflow item (or RLE set of items) * we see. */ WT_COL_FOREACH(page, cip, i) { if ((cell = WT_COL_PTR(page, cip)) == NULL) { orig_deleted = true; ++deleted_cnt; } else { orig_deleted = false; __wt_cell_unpack(cell, unpack); if (unpack->type == WT_CELL_ADDR_DEL) orig_deleted = true; else { entry_cnt += __wt_cell_rle(unpack); rle_cnt += __wt_cell_rle(unpack) - 1; } if (unpack->ovfl) ++ovfl_cnt; } /* * Walk the insert list, checking for changes. For each insert * we find, correct the original count based on its state. */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE(page, cip)) { upd = ins->upd; if (WT_UPDATE_DELETED_ISSET(upd)) { if (!orig_deleted) { ++deleted_cnt; --entry_cnt; } } else if (orig_deleted) { --deleted_cnt; ++entry_cnt; } } }
/* * __verify_tree -- * Verify a tree, recursively descending through it in depth-first fashion. * The page argument was physically verified (so we know it's correctly formed), * and the in-memory version built. Our job is to check logical relationships * in the page and in the tree. */ static int __verify_tree(WT_SESSION_IMPL *session, WT_PAGE *page, WT_VSTUFF *vs) { WT_BM *bm; WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; WT_COL *cip; WT_DECL_RET; WT_REF *ref; uint64_t recno; uint32_t entry, i; int found, lno; bm = S2BT(session)->bm; unpack = &_unpack; WT_VERBOSE_RET(session, verify, "%s %s", __wt_page_addr_string(session, vs->tmp1, page), __wt_page_type_string(page->type)); #ifdef HAVE_DIAGNOSTIC if (vs->dump_address) WT_RET(__wt_msg(session, "%s %s", __wt_page_addr_string(session, vs->tmp1, page), __wt_page_type_string(page->type))); #endif /* * The page's physical structure was verified when it was read into * memory by the read server thread, and then the in-memory version * of the page was built. Now we make sure the page and tree are * logically consistent. * * !!! * The problem: (1) the read server has to build the in-memory version * of the page because the read server is the thread that flags when * any thread can access the page in the tree; (2) we can't build the * in-memory version of the page until the physical structure is known * to be OK, so the read server has to verify at least the physical * structure of the page; (3) doing complete page verification requires * reading additional pages (for example, overflow keys imply reading * overflow pages in order to test the key's order in the page); (4) * the read server cannot read additional pages because it will hang * waiting on itself. For this reason, we split page verification * into a physical verification, which allows the in-memory version * of the page to be built, and then a subsequent logical verification * which happens here. * * Report progress every 10 pages. */ if (++vs->fcnt % 10 == 0) WT_RET(__wt_progress(session, NULL, vs->fcnt)); #ifdef HAVE_DIAGNOSTIC /* Optionally dump the page in debugging mode. */ if (vs->dump_blocks && page->dsk != NULL) WT_RET(__wt_debug_disk(session, page->dsk, NULL)); if (vs->dump_pages) WT_RET(__wt_debug_page(session, page, NULL)); #endif /* * Column-store key order checks: check the page's record number and * then update the total record count. */ switch (page->type) { case WT_PAGE_COL_FIX: recno = page->u.col_fix.recno; goto recno_chk; case WT_PAGE_COL_INT: recno = page->u.intl.recno; goto recno_chk; case WT_PAGE_COL_VAR: recno = page->u.col_var.recno; recno_chk: if (recno != vs->record_total + 1) WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64 " when the expected starting record is %" PRIu64, __wt_page_addr_string(session, vs->tmp1, page), recno, vs->record_total + 1); break; } switch (page->type) { case WT_PAGE_COL_FIX: vs->record_total += page->entries; break; case WT_PAGE_COL_VAR: recno = 0; WT_COL_FOREACH(page, cip, i) if ((cell = WT_COL_PTR(page, cip)) == NULL) ++recno; else { __wt_cell_unpack(cell, unpack); recno += __wt_cell_rle(unpack); } vs->record_total += recno; break; } /* * Row-store leaf page key order check: it's a depth-first traversal, * the first key on this page should be larger than any key previously * seen. */ switch (page->type) { case WT_PAGE_ROW_LEAF: WT_RET(__verify_row_leaf_key_order(session, page, vs)); break; } /* * Check overflow pages. We check overflow cells separately from other * tests that walk the page as it's simpler, and I don't care much how * fast table verify runs. * * Object if a leaf-no-overflow address cell references a page that has * overflow keys, but don't object if a standard address cell references * a page without overflow keys. The leaf-no-overflow address cell is * an optimization for trees without few, if any, overflow items, and * may not be set by reconciliation in all possible cases. */ if (WT_PAGE_IS_ROOT(page)) lno = 0; else { __wt_cell_unpack(page->ref->addr, unpack); lno = unpack->raw == WT_CELL_ADDR_LNO ? 1 : 0; } switch (page->type) { case WT_PAGE_COL_FIX: break; case WT_PAGE_COL_VAR: case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: WT_RET(__verify_overflow_cell(session, page, &found, vs)); if (found && lno) WT_RET_MSG(session, WT_ERROR, "page at %s referenced in its parent by a cell of " "type %s illegally contains overflow items", __wt_page_addr_string(session, vs->tmp1, page), __wt_cell_type_string(WT_CELL_ADDR_LNO)); break; default: if (lno) WT_RET_MSG(session, WT_ERROR, "page at %s is of type %s and is illegally " "referenced in its parent by a cell of type %s", __wt_page_addr_string(session, vs->tmp1, page), __wt_page_type_string(page->type), __wt_cell_type_string(WT_CELL_ADDR_LNO)); break; } /* Check tree connections and recursively descend the tree. */ switch (page->type) { case WT_PAGE_COL_INT: /* For each entry in an internal page, verify the subtree. */ entry = 0; WT_REF_FOREACH(page, ref, i) { /* * It's a depth-first traversal: this entry's starting * record number should be 1 more than the total records * reviewed to this point. */ ++entry; if (ref->u.recno != vs->record_total + 1) { __wt_cell_unpack(ref->addr, unpack); WT_RET_MSG(session, WT_ERROR, "the starting record number in entry %" PRIu32 " of the column internal page at " "%s is %" PRIu64 " and the expected " "starting record number is %" PRIu64, entry, __wt_page_addr_string( session, vs->tmp1, page), ref->u.recno, vs->record_total + 1); } /* Verify the subtree. */ WT_RET(__wt_page_in(session, page, ref)); ret = __verify_tree(session, ref->page, vs); WT_TRET(__wt_page_release(session, ref->page)); WT_RET(ret); __wt_cell_unpack(ref->addr, unpack); WT_RET(bm->verify_addr( bm, session, unpack->data, unpack->size)); } break; case WT_PAGE_ROW_INT: /* For each entry in an internal page, verify the subtree. */ entry = 0; WT_REF_FOREACH(page, ref, i) { /* * It's a depth-first traversal: this entry's starting * key should be larger than the largest key previously * reviewed. * * The 0th key of any internal page is magic, and we * can't test against it. */ ++entry; if (entry != 1) WT_RET(__verify_row_int_key_order( session, page, ref, entry, vs)); /* Verify the subtree. */ WT_RET(__wt_page_in(session, page, ref)); ret = __verify_tree(session, ref->page, vs); WT_TRET(__wt_page_release(session, ref->page)); WT_RET(ret); __wt_cell_unpack(ref->addr, unpack); WT_RET(bm->verify_addr( bm, session, unpack->data, unpack->size)); }