Ejemplo n.º 1
0
/*
 * __wt_debug_offset --
 *	Read and dump a disk page in debugging mode, using a file
 * offset/size/checksum triplet.
 */
int
__wt_debug_offset(WT_SESSION_IMPL *session,
     wt_off_t offset, uint32_t size, uint32_t cksum, const char *ofile)
{
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE], *endp;

	WT_ASSERT(session, S2BT_SAFE(session) != NULL);

	/*
	 * This routine depends on the default block manager's view of files,
	 * where an address consists of a file offset, length, and checksum.
	 * This is for debugging only: other block managers might not see a
	 * file or address the same way, that's why there's no block manager
	 * method.
	 *
	 * Convert the triplet into an address structure.
	 */
	endp = addr;
	WT_RET(__wt_block_addr_to_buffer(
	    S2BT(session)->bm->block, &endp, offset, size, cksum));

	/*
	 * Read the address through the btree I/O functions (so the block is
	 * decompressed as necessary).
	 */
	WT_RET(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_bt_read(session, buf, addr, WT_PTRDIFF(endp, addr)));
	ret = __wt_debug_disk(session, buf->mem, ofile);

err:	__wt_scr_free(session, &buf);
	return (ret);
}
Ejemplo n.º 2
0
/*
 * __wt_debug_addr --
 *	Read and dump a disk page in debugging mode, using an addr/size pair.
 */
int
__wt_debug_addr(WT_SESSION_IMPL *session,
                const uint8_t *addr, uint32_t addr_size, const char *ofile)
{
    WT_DECL_ITEM(buf);
    WT_DECL_RET;

    WT_RET(__wt_scr_alloc(session, 1024, &buf));
    WT_ERR(__wt_block_read(
               session, session->btree->block, buf, addr, addr_size));
    ret = __wt_debug_disk(session, buf->mem, ofile);
err:
    __wt_scr_free(&buf);

    return (ret);
}
Ejemplo n.º 3
0
/*
 * __wt_debug_addr --
 *	Read and dump a disk page in debugging mode, using an addr/size pair.
 */
int
__wt_debug_addr(WT_SESSION_IMPL *session,
    const uint8_t *addr, size_t addr_size, const char *ofile)
{
	WT_BM *bm;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;

	bm = S2BT(session)->bm;

	WT_RET(__wt_scr_alloc(session, 1024, &buf));
	WT_ERR(bm->read(bm, session, buf, addr, addr_size));
	ret = __wt_debug_disk(session, buf->mem, ofile);

err:	__wt_scr_free(session, &buf);
	return (ret);
}
Ejemplo n.º 4
0
/*
 * __wt_debug_off --
 *	Read and dump a disk page in debugging mode, using an offset/size pair.
 */
int
__wt_debug_off(
    WT_SESSION_IMPL *session, uint32_t offset, uint32_t size, const char *ofile)
{
    WT_BTREE *btree;
    WT_DECL_ITEM(buf);
    WT_DECL_RET;

    btree = session->btree;

    WT_RET(__wt_scr_alloc(session, size, &buf));
    WT_ERR(__wt_block_read_off(session,
                               btree->block, buf, offset, size, WT_BLOCK_CHECKSUM_NOT_SET));
    ret = __wt_debug_disk(session, buf->mem, ofile);
err:
    __wt_scr_free(&buf);

    return (ret);
}
Ejemplo n.º 5
0
/*
 * __wt_debug_offset_blind --
 *	Read and dump a disk page in debugging mode, using a file offset.
 */
int
__wt_debug_offset_blind(
    WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile)
{
	WT_DECL_ITEM(buf);
	WT_DECL_RET;

	/*
	 * This routine depends on the default block manager's view of files,
	 * where an address consists of a file offset, length, and checksum.
	 * This is for debugging only.  Other block managers might not see a
	 * file or address the same way, that's why there's no block manager
	 * method.
	 */
	WT_RET(__wt_scr_alloc(session, 1024, &buf));
	WT_ERR(__wt_block_read_off_blind(
	    session, S2BT(session)->bm->block, buf, offset));
	ret = __wt_debug_disk(session, buf->mem, ofile);

err:	__wt_scr_free(session, &buf);
	return (ret);
}
Ejemplo n.º 6
0
/*
 * __verify_tree --
 *	Verify a tree, recursively descending through it in depth-first fashion.
 * The page argument was physically verified (so we know it's correctly formed),
 * and the in-memory version built.  Our job is to check logical relationships
 * in the page and in the tree.
 */
static int
__verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
{
	WT_BM *bm;
	WT_CELL *cell;
	WT_CELL_UNPACK *unpack, _unpack;
	WT_COL *cip;
	WT_DECL_RET;
	WT_PAGE *page;
	WT_REF *child_ref;
	uint64_t recno;
	uint32_t entry, i;
	bool found;

	bm = S2BT(session)->bm;
	page = ref->page;

	unpack = &_unpack;
	WT_CLEAR(*unpack);	/* -Wuninitialized */

	WT_RET(__wt_verbose(session, WT_VERB_VERIFY, "%s %s",
	    __wt_page_addr_string(session, ref, vs->tmp1),
	    __wt_page_type_string(page->type)));

	/* Optionally dump the address. */
	if (vs->dump_address)
		WT_RET(__wt_msg(session, "%s %s",
		    __wt_page_addr_string(session, ref, vs->tmp1),
		    __wt_page_type_string(page->type)));

	/* Track the shape of the tree. */
	if (WT_PAGE_IS_INTERNAL(page))
		++vs->depth_internal[
		    WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];
	else
		++vs->depth_leaf[
		    WT_MIN(vs->depth, WT_ELEMENTS(vs->depth_internal) - 1)];

	/*
	 * The page's physical structure was verified when it was read into
	 * memory by the read server thread, and then the in-memory version
	 * of the page was built. Now we make sure the page and tree are
	 * logically consistent.
	 *
	 * !!!
	 * The problem: (1) the read server has to build the in-memory version
	 * of the page because the read server is the thread that flags when
	 * any thread can access the page in the tree; (2) we can't build the
	 * in-memory version of the page until the physical structure is known
	 * to be OK, so the read server has to verify at least the physical
	 * structure of the page; (3) doing complete page verification requires
	 * reading additional pages (for example, overflow keys imply reading
	 * overflow pages in order to test the key's order in the page); (4)
	 * the read server cannot read additional pages because it will hang
	 * waiting on itself.  For this reason, we split page verification
	 * into a physical verification, which allows the in-memory version
	 * of the page to be built, and then a subsequent logical verification
	 * which happens here.
	 *
	 * Report progress occasionally.
	 */
#define	WT_VERIFY_PROGRESS_INTERVAL	100
	if (++vs->fcnt % WT_VERIFY_PROGRESS_INTERVAL == 0)
		WT_RET(__wt_progress(session, NULL, vs->fcnt));

#ifdef HAVE_DIAGNOSTIC
	/* Optionally dump the blocks or page in debugging mode. */
	if (vs->dump_blocks)
		WT_RET(__wt_debug_disk(session, page->dsk, NULL));
	if (vs->dump_pages)
		WT_RET(__wt_debug_page(session, page, NULL));
#endif

	/*
	 * Column-store key order checks: check the page's record number and
	 * then update the total record count.
	 */
	switch (page->type) {
	case WT_PAGE_COL_FIX:
		recno = page->pg_fix_recno;
		goto recno_chk;
	case WT_PAGE_COL_INT:
		recno = page->pg_intl_recno;
		goto recno_chk;
	case WT_PAGE_COL_VAR:
		recno = page->pg_var_recno;
recno_chk:	if (recno != vs->record_total + 1)
			WT_RET_MSG(session, WT_ERROR,
			    "page at %s has a starting record of %" PRIu64
			    " when the expected starting record is %" PRIu64,
			    __wt_page_addr_string(session, ref, vs->tmp1),
			    recno, vs->record_total + 1);
		break;
	}
	switch (page->type) {
	case WT_PAGE_COL_FIX:
		vs->record_total += page->pg_fix_entries;
		break;
	case WT_PAGE_COL_VAR:
		recno = 0;
		WT_COL_FOREACH(page, cip, i)
			if ((cell = WT_COL_PTR(page, cip)) == NULL)
				++recno;
			else {
				__wt_cell_unpack(cell, unpack);
				recno += __wt_cell_rle(unpack);
			}
		vs->record_total += recno;
		break;
	}

	/*
	 * Row-store leaf page key order check: it's a depth-first traversal,
	 * the first key on this page should be larger than any key previously
	 * seen.
	 */
	switch (page->type) {
	case WT_PAGE_ROW_LEAF:
		WT_RET(__verify_row_leaf_key_order(session, ref, vs));
		break;
	}

	/* If it's not the root page, unpack the parent cell. */
	if (!__wt_ref_is_root(ref)) {
		__wt_cell_unpack(ref->addr, unpack);

		/* Compare the parent cell against the page type. */
		switch (page->type) {
		case WT_PAGE_COL_FIX:
			if (unpack->raw != WT_CELL_ADDR_LEAF_NO)
				goto celltype_err;
			break;
		case WT_PAGE_COL_VAR:
			if (unpack->raw != WT_CELL_ADDR_LEAF &&
			    unpack->raw != WT_CELL_ADDR_LEAF_NO)
				goto celltype_err;
			break;
		case WT_PAGE_ROW_LEAF:
			if (unpack->raw != WT_CELL_ADDR_DEL &&
			    unpack->raw != WT_CELL_ADDR_LEAF &&
			    unpack->raw != WT_CELL_ADDR_LEAF_NO)
				goto celltype_err;
			break;
		case WT_PAGE_COL_INT:
		case WT_PAGE_ROW_INT:
			if (unpack->raw != WT_CELL_ADDR_INT)
celltype_err:			WT_RET_MSG(session, WT_ERROR,
				    "page at %s, of type %s, is referenced in "
				    "its parent by a cell of type %s",
				    __wt_page_addr_string(
					session, ref, vs->tmp1),
				    __wt_page_type_string(page->type),
				    __wt_cell_type_string(unpack->raw));
			break;
		}
	}

	/*
	 * Check overflow pages.  We check overflow cells separately from other
	 * tests that walk the page as it's simpler, and I don't care much how
	 * fast table verify runs.
	 */
	switch (page->type) {
	case WT_PAGE_COL_VAR:
	case WT_PAGE_ROW_INT:
	case WT_PAGE_ROW_LEAF:
		WT_RET(__verify_overflow_cell(session, ref, &found, vs));
		if (__wt_ref_is_root(ref) || page->type == WT_PAGE_ROW_INT)
			break;

		/*
		 * Object if a leaf-no-overflow address cell references a page
		 * with overflow keys, but don't object if a leaf address cell
		 * references a page without overflow keys.  Reconciliation
		 * doesn't guarantee every leaf page without overflow items will
		 * be a leaf-no-overflow type.
		 */
		if (found && unpack->raw == WT_CELL_ADDR_LEAF_NO)
			WT_RET_MSG(session, WT_ERROR,
			    "page at %s, of type %s and referenced in its "
			    "parent by a cell of type %s, contains overflow "
			    "items",
			    __wt_page_addr_string(session, ref, vs->tmp1),
			    __wt_page_type_string(page->type),
			    __wt_cell_type_string(WT_CELL_ADDR_LEAF_NO));
		break;
	}

	/* Check tree connections and recursively descend the tree. */
	switch (page->type) {
	case WT_PAGE_COL_INT:
		/* For each entry in an internal page, verify the subtree. */
		entry = 0;
		WT_INTL_FOREACH_BEGIN(session, page, child_ref) {
			/*
			 * It's a depth-first traversal: this entry's starting
			 * record number should be 1 more than the total records
			 * reviewed to this point.
			 */
			++entry;
			if (child_ref->key.recno != vs->record_total + 1) {
				WT_RET_MSG(session, WT_ERROR,
				    "the starting record number in entry %"
				    PRIu32 " of the column internal page at "
				    "%s is %" PRIu64 " and the expected "
				    "starting record number is %" PRIu64,
				    entry,
				    __wt_page_addr_string(
				    session, child_ref, vs->tmp1),
				    child_ref->key.recno,
				    vs->record_total + 1);
			}

			/* Verify the subtree. */
			++vs->depth;
			WT_RET(__wt_page_in(session, child_ref, 0));
			ret = __verify_tree(session, child_ref, vs);
			WT_TRET(__wt_page_release(session, child_ref, 0));
			--vs->depth;
			WT_RET(ret);

			__wt_cell_unpack(child_ref->addr, unpack);
			WT_RET(bm->verify_addr(
			    bm, session, unpack->data, unpack->size));
		} WT_INTL_FOREACH_END;
		break;
	case WT_PAGE_ROW_INT:
		/* For each entry in an internal page, verify the subtree. */
		entry = 0;
		WT_INTL_FOREACH_BEGIN(session, page, child_ref) {
			/*
			 * It's a depth-first traversal: this entry's starting
			 * key should be larger than the largest key previously
			 * reviewed.
			 *
			 * The 0th key of any internal page is magic, and we
			 * can't test against it.
			 */
			++entry;
			if (entry != 1)
				WT_RET(__verify_row_int_key_order(
				    session, page, child_ref, entry, vs));

			/* Verify the subtree. */
			++vs->depth;
			WT_RET(__wt_page_in(session, child_ref, 0));
			ret = __verify_tree(session, child_ref, vs);
			WT_TRET(__wt_page_release(session, child_ref, 0));
			--vs->depth;
			WT_RET(ret);

			__wt_cell_unpack(child_ref->addr, unpack);
			WT_RET(bm->verify_addr(
			    bm, session, unpack->data, unpack->size));
		} WT_INTL_FOREACH_END;
Ejemplo n.º 7
0
/*
 * __verify_tree --
 *	Verify a tree, recursively descending through it in depth-first fashion.
 * The page argument was physically verified (so we know it's correctly formed),
 * and the in-memory version built.  Our job is to check logical relationships
 * in the page and in the tree.
 */
static int
__verify_tree(WT_SESSION_IMPL *session, WT_PAGE *page, WT_VSTUFF *vs)
{
	WT_BM *bm;
	WT_CELL *cell;
	WT_CELL_UNPACK *unpack, _unpack;
	WT_COL *cip;
	WT_DECL_RET;
	WT_REF *ref;
	uint64_t recno;
	uint32_t entry, i;
	int found, lno;

	bm = S2BT(session)->bm;
	unpack = &_unpack;

	WT_VERBOSE_RET(session, verify, "%s %s",
	    __wt_page_addr_string(session, vs->tmp1, page),
	    __wt_page_type_string(page->type));
#ifdef HAVE_DIAGNOSTIC
	if (vs->dump_address)
		WT_RET(__wt_msg(session, "%s %s",
		    __wt_page_addr_string(session, vs->tmp1, page),
		    __wt_page_type_string(page->type)));
#endif

	/*
	 * The page's physical structure was verified when it was read into
	 * memory by the read server thread, and then the in-memory version
	 * of the page was built.   Now we make sure the page and tree are
	 * logically consistent.
	 *
	 * !!!
	 * The problem: (1) the read server has to build the in-memory version
	 * of the page because the read server is the thread that flags when
	 * any thread can access the page in the tree; (2) we can't build the
	 * in-memory version of the page until the physical structure is known
	 * to be OK, so the read server has to verify at least the physical
	 * structure of the page; (3) doing complete page verification requires
	 * reading additional pages (for example, overflow keys imply reading
	 * overflow pages in order to test the key's order in the page); (4)
	 * the read server cannot read additional pages because it will hang
	 * waiting on itself.  For this reason, we split page verification
	 * into a physical verification, which allows the in-memory version
	 * of the page to be built, and then a subsequent logical verification
	 * which happens here.
	 *
	 * Report progress every 10 pages.
	 */
	if (++vs->fcnt % 10 == 0)
		WT_RET(__wt_progress(session, NULL, vs->fcnt));

#ifdef HAVE_DIAGNOSTIC
	/* Optionally dump the page in debugging mode. */
	if (vs->dump_blocks && page->dsk != NULL)
		WT_RET(__wt_debug_disk(session, page->dsk, NULL));
	if (vs->dump_pages)
		WT_RET(__wt_debug_page(session, page, NULL));
#endif

	/*
	 * Column-store key order checks: check the page's record number and
	 * then update the total record count.
	 */
	switch (page->type) {
	case WT_PAGE_COL_FIX:
		recno = page->u.col_fix.recno;
		goto recno_chk;
	case WT_PAGE_COL_INT:
		recno = page->u.intl.recno;
		goto recno_chk;
	case WT_PAGE_COL_VAR:
		recno = page->u.col_var.recno;
recno_chk:	if (recno != vs->record_total + 1)
			WT_RET_MSG(session, WT_ERROR,
			    "page at %s has a starting record of %" PRIu64
			    " when the expected starting record is %" PRIu64,
			    __wt_page_addr_string(session, vs->tmp1, page),
			    recno, vs->record_total + 1);
		break;
	}
	switch (page->type) {
	case WT_PAGE_COL_FIX:
		vs->record_total += page->entries;
		break;
	case WT_PAGE_COL_VAR:
		recno = 0;
		WT_COL_FOREACH(page, cip, i)
			if ((cell = WT_COL_PTR(page, cip)) == NULL)
				++recno;
			else {
				__wt_cell_unpack(cell, unpack);
				recno += __wt_cell_rle(unpack);
			}
		vs->record_total += recno;
		break;
	}

	/*
	 * Row-store leaf page key order check: it's a depth-first traversal,
	 * the first key on this page should be larger than any key previously
	 * seen.
	 */
	switch (page->type) {
	case WT_PAGE_ROW_LEAF:
		WT_RET(__verify_row_leaf_key_order(session, page, vs));
		break;
	}

	/*
	 * Check overflow pages.  We check overflow cells separately from other
	 * tests that walk the page as it's simpler, and I don't care much how
	 * fast table verify runs.
	 *
	 * Object if a leaf-no-overflow address cell references a page that has
	 * overflow keys, but don't object if a standard address cell references
	 * a page without overflow keys.  The leaf-no-overflow address cell is
	 * an optimization for trees without few, if any, overflow items, and
	 * may not be set by reconciliation in all possible cases.
	 */
	if (WT_PAGE_IS_ROOT(page))
		lno = 0;
	else {
		__wt_cell_unpack(page->ref->addr, unpack);
		lno = unpack->raw == WT_CELL_ADDR_LNO ? 1 : 0;
	}
	switch (page->type) {
	case WT_PAGE_COL_FIX:
		break;
	case WT_PAGE_COL_VAR:
	case WT_PAGE_ROW_INT:
	case WT_PAGE_ROW_LEAF:
		WT_RET(__verify_overflow_cell(session, page, &found, vs));
		if (found && lno)
			WT_RET_MSG(session, WT_ERROR,
			    "page at %s referenced in its parent by a cell of "
			    "type %s illegally contains overflow items",
			    __wt_page_addr_string(session, vs->tmp1, page),
			    __wt_cell_type_string(WT_CELL_ADDR_LNO));
		break;
	default:
		if (lno)
			WT_RET_MSG(session, WT_ERROR,
			    "page at %s is of type %s and is illegally "
			    "referenced in its parent by a cell of type %s",
			    __wt_page_addr_string(session, vs->tmp1, page),
			    __wt_page_type_string(page->type),
			    __wt_cell_type_string(WT_CELL_ADDR_LNO));
		break;
	}

	/* Check tree connections and recursively descend the tree. */
	switch (page->type) {
	case WT_PAGE_COL_INT:
		/* For each entry in an internal page, verify the subtree. */
		entry = 0;
		WT_REF_FOREACH(page, ref, i) {
			/*
			 * It's a depth-first traversal: this entry's starting
			 * record number should be 1 more than the total records
			 * reviewed to this point.
			 */
			++entry;
			if (ref->u.recno != vs->record_total + 1) {
				__wt_cell_unpack(ref->addr, unpack);
				WT_RET_MSG(session, WT_ERROR,
				    "the starting record number in entry %"
				    PRIu32 " of the column internal page at "
				    "%s is %" PRIu64 " and the expected "
				    "starting record number is %" PRIu64,
				    entry,
				    __wt_page_addr_string(
				    session, vs->tmp1, page),
				    ref->u.recno,
				    vs->record_total + 1);
			}

			/* Verify the subtree. */
			WT_RET(__wt_page_in(session, page, ref));
			ret = __verify_tree(session, ref->page, vs);
			WT_TRET(__wt_page_release(session, ref->page));
			WT_RET(ret);

			__wt_cell_unpack(ref->addr, unpack);
			WT_RET(bm->verify_addr(
			    bm, session, unpack->data, unpack->size));
		}
		break;
	case WT_PAGE_ROW_INT:
		/* For each entry in an internal page, verify the subtree. */
		entry = 0;
		WT_REF_FOREACH(page, ref, i) {
			/*
			 * It's a depth-first traversal: this entry's starting
			 * key should be larger than the largest key previously
			 * reviewed.
			 *
			 * The 0th key of any internal page is magic, and we
			 * can't test against it.
			 */
			++entry;
			if (entry != 1)
				WT_RET(__verify_row_int_key_order(
				    session, page, ref, entry, vs));

			/* Verify the subtree. */
			WT_RET(__wt_page_in(session, page, ref));
			ret = __verify_tree(session, ref->page, vs);
			WT_TRET(__wt_page_release(session, ref->page));
			WT_RET(ret);

			__wt_cell_unpack(ref->addr, unpack);
			WT_RET(bm->verify_addr(
			    bm, session, unpack->data, unpack->size));
		}