Пример #1
0
/*
 * __stat_tree_walk --
 *	Gather btree statistics that require traversing the tree.
 */
static int
__stat_tree_walk(WT_SESSION_IMPL *session)
{
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_DSRC_STATS **stats;
	WT_REF *next_walk;

	btree = S2BT(session);
	stats = btree->dhandle->stats;

	/*
	 * Clear the statistics we're about to count.
	 */
	WT_STAT_SET(session, stats, btree_column_deleted, 0);
	WT_STAT_SET(session, stats, btree_column_fix, 0);
	WT_STAT_SET(session, stats, btree_column_internal, 0);
	WT_STAT_SET(session, stats, btree_column_rle, 0);
	WT_STAT_SET(session, stats, btree_column_variable, 0);
	WT_STAT_SET(session, stats, btree_entries, 0);
	WT_STAT_SET(session, stats, btree_overflow, 0);
	WT_STAT_SET(session, stats, btree_row_internal, 0);
	WT_STAT_SET(session, stats, btree_row_leaf, 0);

	next_walk = NULL;
	while ((ret = __wt_tree_walk(
	    session, &next_walk, 0)) == 0 && next_walk != NULL) {
		WT_WITH_PAGE_INDEX(session,
		    ret = __stat_page(session, next_walk->page, stats));
		WT_RET(ret);
	}
	return (ret == WT_NOTFOUND ? 0 : ret);
}
Пример #2
0
/*
 * __wt_btcur_next_random --
 *	Move to a random record in the tree.
 */
int
__wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	btree = cbt->btree;

	/*
	 * Only supports row-store: applications can trivially select a random
	 * value from a column-store, if there were any reason to do so.
	 */
	if (btree->type != BTREE_ROW)
		WT_RET(ENOTSUP);

	WT_STAT_FAST_CONN_INCR(session, cursor_next);
	WT_STAT_FAST_DATA_INCR(session, cursor_next);

	WT_RET(__cursor_func_init(cbt, 1));

	WT_WITH_PAGE_INDEX(session,
	    ret = __wt_row_random(session, cbt));
	WT_ERR(ret);
	if (__cursor_valid(cbt, &upd))
		WT_ERR(__wt_kv_return(session, cbt, upd));
	else
		WT_ERR(__wt_btcur_search_near(cbt, NULL));

err:	if (ret != 0)
		WT_TRET(__cursor_reset(cbt));
	return (ret);
}
Пример #3
0
/*
 * __cursor_row_search --
 *	Row-store search from an application cursor.
 */
static inline int
__cursor_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, int insert)
{
	WT_DECL_RET;

	WT_WITH_PAGE_INDEX(session, 
	    ret = __wt_row_search(session, &cbt->iface.key, NULL, cbt, insert));
	return (ret);
}
Пример #4
0
/*
 * __cursor_col_search --
 *	Column-store search from an application cursor.
 */
static inline int
__cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
{
	WT_DECL_RET;

	WT_WITH_PAGE_INDEX(session, 
	    ret = __wt_col_search(session, cbt->iface.recno, NULL, cbt));
	return (ret);
}
Пример #5
0
/*
 * __wt_btree_stat_init --
 *	Initialize the Btree statistics.
 */
int
__wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_DSRC_STATS **stats;
	WT_REF *next_walk;

	btree = S2BT(session);
	bm = btree->bm;
	stats = btree->dhandle->stats;

	WT_RET(bm->stat(bm, session, stats[0]));

	WT_STAT_SET(session, stats, btree_fixed_len, btree->bitcnt);
	WT_STAT_SET(session, stats, btree_maximum_depth, btree->maximum_depth);
	WT_STAT_SET(session, stats, btree_maxintlkey, btree->maxintlkey);
	WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage);
	WT_STAT_SET(session, stats, btree_maxleafkey, btree->maxleafkey);
	WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
	WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);

	/* Everything else is really, really expensive. */
	if (!F_ISSET(cst, WT_CONN_STAT_ALL))
		return (0);

	/*
	 * Clear the statistics we're about to count.
	 */
	WT_STAT_SET(session, stats, btree_column_deleted, 0);
	WT_STAT_SET(session, stats, btree_column_fix, 0);
	WT_STAT_SET(session, stats, btree_column_internal, 0);
	WT_STAT_SET(session, stats, btree_column_rle, 0);
	WT_STAT_SET(session, stats, btree_column_variable, 0);
	WT_STAT_SET(session, stats, btree_entries, 0);
	WT_STAT_SET(session, stats, btree_overflow, 0);
	WT_STAT_SET(session, stats, btree_row_internal, 0);
	WT_STAT_SET(session, stats, btree_row_leaf, 0);

	next_walk = NULL;
	while ((ret = __wt_tree_walk(
	    session, &next_walk, 0)) == 0 && next_walk != NULL) {
		WT_WITH_PAGE_INDEX(session,
		    ret = __stat_page(session, next_walk->page, stats));
		WT_RET(ret);
	}
	return (ret == WT_NOTFOUND ? 0 : ret);
}
Пример #6
0
/*
 * __wt_btcur_next_random --
 *	Move to a random record in the tree. There are two algorithms, one
 *	where we select a record at random from the whole tree on each
 *	retrieval and one where we first select a record at random from the
 *	whole tree, and then subsequently sample forward from that location.
 *	The sampling approach allows us to select reasonably uniform random
 *	points from unbalanced trees.
 */
int
__wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
{
	WT_BTREE *btree;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	WT_UPDATE *upd;
	wt_off_t size;
	uint64_t skip;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	btree = cbt->btree;

	/*
	 * Only supports row-store: applications can trivially select a random
	 * value from a column-store, if there were any reason to do so.
	 */
	if (btree->type != BTREE_ROW)
		WT_RET_MSG(session, ENOTSUP,
		    "WT_CURSOR.next_random only supported by row-store tables");

	WT_STAT_CONN_INCR(session, cursor_next);
	WT_STAT_DATA_INCR(session, cursor_next);

	/*
	 * If retrieving random values without sampling, or we don't have a
	 * page reference, pick a roughly random leaf page in the tree.
	 */
	if (cbt->ref == NULL || cbt->next_random_sample_size == 0) {
		/*
		 * Skip past the sample size of the leaf pages in the tree
		 * between each random key return to compensate for unbalanced
		 * trees.
		 *
		 * Use the underlying file size divided by its block allocation
		 * size as our guess of leaf pages in the file (this can be
		 * entirely wrong, as it depends on how many pages are in this
		 * particular checkpoint, how large the leaf and internal pages
		 * really are, and other factors). Then, divide that value by
		 * the configured sample size and increment the final result to
		 * make sure tiny files don't leave us with a skip value of 0.
		 *
		 * !!!
		 * Ideally, the number would be prime to avoid restart issues.
		 */
		if (cbt->next_random_sample_size != 0) {
			WT_ERR(btree->bm->size(btree->bm, session, &size));
			cbt->next_random_leaf_skip = (uint64_t)
			    ((size / btree->allocsize) /
			    cbt->next_random_sample_size) + 1;
		}

		/*
		 * Choose a leaf page from the tree.
		 */
		WT_ERR(__cursor_func_init(cbt, true));
		WT_WITH_PAGE_INDEX(
		    session, ret = __wt_row_random_descent(session, cbt));
		WT_ERR(ret);
	} else {
		/*
		 * Read through the tree, skipping leaf pages. Be cautious about
		 * the skip count: if the last leaf page skipped was also the
		 * last leaf page in the tree, it may be set to zero on return
		 * with the end-of-walk condition.
		 *
		 * Pages read for data sampling aren't "useful"; don't update
		 * the read generation of pages already in memory, and if a page
		 * is read, set its generation to a low value so it is evicted
		 * quickly.
		 */
		for (skip =
		    cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;)
			WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip,
			    WT_READ_NO_GEN |
			    WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
	}

	/*
	 * Select a random entry from the leaf page. If it's not valid, move to
	 * the next entry, if that doesn't work, move to the previous entry.
	 */
	WT_ERR(__wt_row_random_leaf(session, cbt));
	if (__cursor_valid(cbt, &upd))
		WT_ERR(__wt_kv_return(session, cbt, upd));
	else {
		if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND)
			ret = __wt_btcur_prev(cbt, false);
		WT_ERR(ret);
	}
	return (0);

err:	WT_TRET(__cursor_reset(cbt));
	return (ret);
}
Пример #7
0
/*
 * __wt_verify --
 *	Verify a file.
 */
int
__wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_CKPT *ckptbase, *ckpt;
	WT_DECL_RET;
	WT_VSTUFF *vs, _vstuff;
	size_t root_addr_size;
	uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE];
	bool bm_start, quit;

	btree = S2BT(session);
	bm = btree->bm;
	ckptbase = NULL;
	bm_start = false;

	WT_CLEAR(_vstuff);
	vs = &_vstuff;
	WT_ERR(__wt_scr_alloc(session, 0, &vs->max_key));
	WT_ERR(__wt_scr_alloc(session, 0, &vs->max_addr));
	WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp1));
	WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp2));
	WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp3));
	WT_ERR(__wt_scr_alloc(session, 0, &vs->tmp4));

	/* Check configuration strings. */
	WT_ERR(__verify_config(session, cfg, vs));

	/* Optionally dump specific block offsets. */
	WT_ERR(__verify_config_offsets(session, cfg, &quit));
	if (quit)
		goto done;

	/* Get a list of the checkpoints for this file. */
	WT_ERR(
	    __wt_meta_ckptlist_get(session, btree->dhandle->name, &ckptbase));

	/* Inform the underlying block manager we're verifying. */
	WT_ERR(bm->verify_start(bm, session, ckptbase, cfg));
	bm_start = true;

	/* Loop through the file's checkpoints, verifying each one. */
	WT_CKPT_FOREACH(ckptbase, ckpt) {
		WT_ERR(__wt_verbose(session, WT_VERB_VERIFY,
		    "%s: checkpoint %s", btree->dhandle->name, ckpt->name));

		/* Fake checkpoints require no work. */
		if (F_ISSET(ckpt, WT_CKPT_FAKE))
			continue;

		/* House-keeping between checkpoints. */
		__verify_checkpoint_reset(vs);

		if (WT_VRFY_DUMP(vs))
			WT_ERR(__wt_msg(session, "%s: checkpoint %s",
			    btree->dhandle->name, ckpt->name));

		/* Load the checkpoint. */
		WT_ERR(bm->checkpoint_load(bm, session,
		    ckpt->raw.data, ckpt->raw.size,
		    root_addr, &root_addr_size, true));

		/*
		 * Ignore trees with no root page.
		 * Verify, then discard the checkpoint from the cache.
		 */
		if (root_addr_size != 0 &&
		    (ret = __wt_btree_tree_open(
		    session, root_addr, root_addr_size)) == 0) {
			if (WT_VRFY_DUMP(vs))
				WT_ERR(__wt_msg(session, "Root: %s %s",
				    __wt_addr_string(session,
				    root_addr, root_addr_size, vs->tmp1),
				    __wt_page_type_string(
				    btree->root.page->type)));

			WT_WITH_PAGE_INDEX(session,
			    ret = __verify_tree(session, &btree->root, vs));

			WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD));
		}

		/* Unload the checkpoint. */
		WT_TRET(bm->checkpoint_unload(bm, session));
		WT_ERR(ret);

		/* Display the tree shape. */
		if (vs->dump_shape)
			WT_ERR(__verify_tree_shape(session, vs));
	}