Example #1
0
/*
 * __wt_lsm_tree_setup_chunk --
 *	Initialize a chunk of an LSM tree.
 */
int
__wt_lsm_tree_setup_chunk(
    WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
	WT_ITEM buf;
	const char *cfg[] = API_CONF_DEFAULTS(session, drop, "force");

	WT_CLEAR(buf);

	WT_RET(__wt_lsm_tree_chunk_name(session, lsm_tree, chunk->id, &buf));
	chunk->uri = __wt_buf_steal(session, &buf, NULL);

	/*
	 * Drop the chunk first - there may be some content hanging over from
	 * an aborted merge or checkpoint.
	 *
	 * Don't do this for the very first chunk: we are called during
	 * WT_SESSION::create, and doing a drop inside there does interesting
	 * things with handle locks and metadata tracking.  It can never have
	 * been the result of an interrupted merge, anyway.
	 */
	if (chunk->id > 1)
		WT_RET(__wt_schema_drop(session, chunk->uri, cfg));
	return (__wt_schema_create(session, chunk->uri, lsm_tree->file_config));
}
Example #2
0
/*
 * __wt_meta_track_off --
 *	Turn off metadata operation tracking, unrolling on error.
 */
int
__wt_meta_track_off(WT_SESSION_IMPL *session, int unroll)
{
	WT_BTREE *saved_btree;
	WT_DECL_RET;
	WT_META_TRACK *trk, *trk_orig;
	const char *ckpt_cfg[] = API_CONF_DEFAULTS(session, checkpoint, NULL);

	WT_ASSERT(session,
	    WT_META_TRACKING(session) && session->meta_track_nest > 0);
	if (--session->meta_track_nest != 0)
		return (0);

	trk_orig = session->meta_track;
	trk = session->meta_track_next;

	/* Turn off tracking for unroll. */
	session->meta_track_next = session->meta_track_sub = NULL;

	while (--trk >= trk_orig)
		WT_TRET(__meta_track_apply(session, trk, unroll));

	/* If the operation succeeded, checkpoint the metadata. */
	if (!unroll && ret == 0 && session->metafile != NULL) {
		saved_btree = session->btree;
		session->btree = session->metafile;
		ret = __wt_checkpoint(session, ckpt_cfg);
		session->btree = saved_btree;
	}

	return (ret);
}
Example #3
0
/*
 * __wt_meta_turtle_init --
 *	Check the turtle file and create if necessary.
 */
int
__wt_meta_turtle_init(WT_SESSION_IMPL *session, int *existp)
{
	WT_DECL_RET;
	WT_ITEM *buf;
	int exist;
	const char *metaconf;
	const char *cfg[] = API_CONF_DEFAULTS(file, meta, NULL);

	buf = NULL;
	metaconf = NULL;
	*existp = 0;

	/* Discard any turtle setup file left-over from previous runs. */
	WT_RET(__wt_exist(session, WT_METADATA_TURTLE_SET, &exist));
	if (exist)
		WT_RET(__wt_remove(session, WT_METADATA_TURTLE_SET));

	/* If there's already a turtle file, we're done. */
	WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist));
	if (exist) {
		*existp = 1;
		return (0);
	}

	/* Create a turtle file with default values. */
	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "key_format=S,value_format=S,version=(major=%d,minor=%d)",
	    WT_BTREE_MAJOR_VERSION, WT_BTREE_MINOR_VERSION));
	cfg[1] = buf->data;
	WT_ERR(__wt_config_collapse(session, cfg, &metaconf));
	WT_ERR(__wt_meta_turtle_update(session, WT_METADATA_URI, metaconf));

err:	__wt_free(session, metaconf);
	__wt_scr_free(&buf);

	return (ret);
}
Example #4
0
/*
 * __lsm_tree_open_check --
 *	Validate the configuration of an LSM tree.
 */
static int
__lsm_tree_open_check(
    WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_CONFIG_ITEM cval;
	const char *cfg[] = API_CONF_DEFAULTS(
	    session, create, lsm_tree->file_config);
	uint64_t required;
	uint32_t maxleafpage;

	WT_RET(__wt_config_gets(
	    session, cfg, "leaf_page_max", &cval));
	maxleafpage = (uint32_t)cval.val;

	/* Three chunks, plus one page for each participant in a merge. */
	required = 3 * lsm_tree->chunk_size +
	    lsm_tree->merge_threads * (lsm_tree->merge_max *  maxleafpage);
	if (S2C(session)->cache_size < required)
		WT_RET_MSG(session, EINVAL,
		    "The LSM configuration requires a cache size of at least %"
		    PRIu64 ". Configured size is %" PRIu64,
		    required, S2C(session)->cache_size);
	return (0);
}
Example #5
0
/*
 * __wt_lsm_tree_create --
 *	Create an LSM tree structure for the given name.
 */
int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
    const char *uri, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	const char *cfg[] = API_CONF_DEFAULTS(session, create, config);
	const char *tmpconfig;

	/* If the tree is open, it already exists. */
	if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) {
		__wt_lsm_tree_release(session, lsm_tree);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If the tree has metadata, it already exists.
	 *
	 * !!!
	 * Use a local variable: we don't care what the existing configuration
	 * is, but we don't want to overwrite the real config.
	 */
	if (__wt_metadata_read(session, uri, &tmpconfig) == 0) {
		__wt_free(session, tmpconfig);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
	if (WT_STRING_MATCH("r", cval.str, cval.len))
		WT_RET_MSG(session, EINVAL,
		    "LSM trees cannot be configured as column stores");

	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));

	WT_RET(__lsm_tree_set_name(session, lsm_tree, uri));

	WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->key_format));
	WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->value_format));

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom", &cval));
	FLD_SET(lsm_tree->bloom,
	    (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_newest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST);
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_oldest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);

	if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
	    (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST) ||
	    FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)))
		WT_ERR_MSG(session, EINVAL,
		    "Bloom filters can only be created on newest and oldest "
		    "chunks if bloom filters are enabled");

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_config", &cval));
	if (cval.type == ITEM_STRUCT) {
		cval.str++;
		cval.len -= 2;
	}
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->bloom_config));

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_bit_count", &cval));
	lsm_tree->bloom_bit_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_hash_count", &cval));
	lsm_tree->bloom_hash_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_chunk_size", &cval));
	lsm_tree->chunk_size = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_max", &cval));
	lsm_tree->merge_max = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_threads", &cval));
	lsm_tree->merge_threads = (uint32_t)cval.val;
	/* Sanity check that api_data.py is in sync with lsm.h */
	WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS);

	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "%s,key_format=u,value_format=u", config));
	lsm_tree->file_config = __wt_buf_steal(session, buf, NULL);

	/* Create the first chunk and flush the metadata. */
	WT_ERR(__wt_lsm_meta_write(session, lsm_tree));

	/* Discard our partially populated handle. */
	ret = __lsm_tree_discard(session, lsm_tree);
	lsm_tree = NULL;

	/*
	 * Open our new tree and add it to the handle cache. Don't discard on
	 * error: the returned handle is NULL on error, and the metadata
	 * tracking macros handle cleaning up on failure.
	 */
	if (ret == 0)
		ret = __lsm_tree_open(session, uri, &lsm_tree);
	if (ret == 0)
		__wt_lsm_tree_release(session, lsm_tree);

	if (0) {
err:		WT_TRET(__lsm_tree_discard(session, lsm_tree));
	}
	__wt_scr_free(&buf);
	return (ret);
}
Example #6
0
/*
 * __wt_lsm_stat_init --
 *	Initialize a LSM statistics structure.
 */
int
__wt_lsm_stat_init(WT_SESSION_IMPL *session,
    WT_LSM_TREE *lsm_tree, WT_CURSOR_STAT *cst, uint32_t flags)
{
	WT_CURSOR *stat_cursor;
	WT_DECL_ITEM(uribuf);
	WT_DECL_RET;
	WT_DSRC_STATS *stats;
	WT_LSM_CHUNK *chunk;
	const char *cfg[] = API_CONF_DEFAULTS(
	    session, open_cursor, "statistics_fast=on");
	const char *disk_cfg[] = API_CONF_DEFAULTS(session,
	    open_cursor, "checkpoint=WiredTigerCheckpoint,statistics_fast=on");
	const char *desc, *pvalue;
	uint64_t value;
	u_int i;
	int locked, stat_key;

	WT_UNUSED(flags);
	locked = 0;

	WT_ERR(__wt_scr_alloc(session, 0, &uribuf));

	/* Clear the statistics we are about to recalculate. */
	if (cst->stats != NULL)
		stats = (WT_DSRC_STATS *)cst->stats;
	else {
		WT_ERR(__wt_calloc_def(session, 1, &stats));
		__wt_stat_init_dsrc_stats(stats);
		cst->stats_first = cst->stats = (WT_STATS *)stats;
		cst->stats_count = sizeof(*stats) / sizeof(WT_STATS);
	}
	*stats = lsm_tree->stats;

	if (LF_ISSET(WT_STATISTICS_CLEAR))
		__wt_stat_clear_dsrc_stats(&lsm_tree->stats);

	/* Hold the LSM lock so that we can safely walk through the chunks. */
	WT_ERR(__wt_readlock(session, lsm_tree->rwlock));
	locked = 1;

	/* Set the stats for this run. */
	WT_STAT_SET(stats, lsm_chunk_count, lsm_tree->nchunks);
	for (i = 0; i < lsm_tree->nchunks; i++) {
		chunk = lsm_tree->chunk[i];
		if (chunk->generation >
		    (uint32_t)WT_STAT(stats, lsm_generation_max))
			WT_STAT_SET(stats,
			    lsm_generation_max, chunk->generation);

		/*
		 * LSM chunk reads happen from a checkpoint, so get the
		 * statistics for a checkpoint if one exists.
		 */
		WT_ERR(__wt_buf_fmt(
		    session, uribuf, "statistics:%s", chunk->uri));
		ret = __wt_curstat_open(session, uribuf->data,
		    F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ? disk_cfg : cfg,
		    &stat_cursor);
		/*
		 * XXX kludge: we may have an empty chunk where no checkpoint
		 * was written.  If so, try to open the ordinary handle on that
		 * chunk instead.
		 */
		if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
			ret = __wt_curstat_open(
			    session, uribuf->data, cfg, &stat_cursor);
		WT_ERR(ret);

		while ((ret = stat_cursor->next(stat_cursor)) == 0) {
			WT_ERR(stat_cursor->get_key(stat_cursor, &stat_key));
			WT_ERR(stat_cursor->get_value(
			    stat_cursor, &desc, &pvalue, &value));
			WT_STAT_INCRKV(stats, stat_key, value);
		}
		WT_ERR_NOTFOUND_OK(ret);
		WT_ERR(stat_cursor->close(stat_cursor));

		if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
			continue;

		WT_STAT_INCR(stats, bloom_count);
		WT_STAT_INCRV(stats, bloom_size,
		    (chunk->count * lsm_tree->bloom_bit_count) / 8);

		WT_ERR(__wt_buf_fmt(
		    session, uribuf, "statistics:%s", chunk->bloom_uri));
		WT_ERR(__wt_curstat_open(session, uribuf->data,
		    cfg, &stat_cursor));

		stat_cursor->set_key(
		    stat_cursor, WT_STAT_DSRC_CACHE_EVICTION_CLEAN);
		WT_ERR(stat_cursor->search(stat_cursor));
		WT_ERR(stat_cursor->get_value(
		    stat_cursor, &desc, &pvalue, &value));
		WT_STAT_INCRV(stats, cache_eviction_clean, value);
		WT_STAT_INCRV(stats, bloom_page_evict, value);

		stat_cursor->set_key(
		    stat_cursor, WT_STAT_DSRC_CACHE_EVICTION_DIRTY);
		WT_ERR(stat_cursor->search(stat_cursor));
		WT_ERR(stat_cursor->get_value(
		    stat_cursor, &desc, &pvalue, &value));
		WT_STAT_INCRV(stats, cache_eviction_dirty, value);
		WT_STAT_INCRV(stats, bloom_page_evict, value);

		stat_cursor->set_key(
		    stat_cursor, WT_STAT_DSRC_CACHE_EVICTION_FAIL);
		WT_ERR(stat_cursor->search(stat_cursor));
		WT_ERR(stat_cursor->get_value(
		    stat_cursor, &desc, &pvalue, &value));
		WT_STAT_INCRV(stats, cache_eviction_fail, value);

		stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_CACHE_READ);
		WT_ERR(stat_cursor->search(stat_cursor));
		WT_ERR(stat_cursor->get_value(
		    stat_cursor, &desc, &pvalue, &value));
		WT_STAT_INCRV(stats, cache_read, value);
		WT_STAT_INCRV(stats, bloom_page_read, value);

		stat_cursor->set_key(stat_cursor, WT_STAT_DSRC_CACHE_WRITE);
		WT_ERR(stat_cursor->search(stat_cursor));
		WT_ERR(stat_cursor->get_value(
		    stat_cursor, &desc, &pvalue, &value));
		WT_STAT_INCRV(stats, cache_write, value);
		WT_ERR(stat_cursor->close(stat_cursor));
	}

err:	if (locked)
		WT_TRET(__wt_rwunlock(session, lsm_tree->rwlock));
	__wt_scr_free(&uribuf);

	return (ret);
}