示例#1
0
/*__wt_session_compact*/
int __wt_session_compact(WT_SESSION *wt_session, const char *uri, const char *config)
{
	WT_COMPACT compact;
	WT_CONFIG_ITEM cval;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = (WT_SESSION_IMPL *)wt_session;
	SESSION_API_CALL(session, compact, config, cfg);

	/* Setup the structure in the session handle */
	memset(&compact, 0, sizeof(WT_COMPACT));
	session->compact = &compact;

	WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval));
	session->compact->max_time = (uint64_t)cval.val;

	/* Find the types of data sources are being compacted. */
	WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, NULL, __wt_compact_uri_analyze, cfg, 0));
	WT_ERR(ret);

	if (session->compact->lsm_count != 0)
		WT_ERR(__wt_schema_worker(session, uri, NULL, __wt_lsm_compact, cfg, 0));
	if (session->compact->file_count != 0)
		WT_ERR(__compact_file(session, uri, cfg));

err:	
	session->compact = NULL;
	API_END_RET_NOTFOUND_MAP(session, ret);
}
示例#2
0
/*
 * __wt_lsm_tree_worker --
 *	Run a schema worker operation on each level of a LSM tree.
 */
int
__wt_lsm_tree_worker(WT_SESSION_IMPL *session,
   const char *uri,
   int (*file_func)(WT_SESSION_IMPL *, const char *[]),
   int (*name_func)(WT_SESSION_IMPL *, const char *, int *),
   const char *cfg[], uint32_t open_flags)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_TREE *lsm_tree;
	u_int i;
	int exclusive, locked;

	locked = 0;
	exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE) ? 1 : 0;
	WT_RET(__wt_lsm_tree_get(session, uri, exclusive, &lsm_tree));

	/*
	 * We mark that we're busy using the tree to coordinate
	 * with merges so that merging doesn't change the chunk
	 * array out from underneath us.
	 */
	WT_ERR(exclusive ?
	    __wt_lsm_tree_writelock(session, lsm_tree) :
	    __wt_lsm_tree_readlock(session, lsm_tree));
	locked = 1;
	for (i = 0; i < lsm_tree->nchunks; i++) {
		chunk = lsm_tree->chunk[i];
		if (file_func == __wt_checkpoint &&
		    F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
			continue;
		WT_ERR(__wt_schema_worker(session, chunk->uri,
		    file_func, name_func, cfg, open_flags));
		if (name_func == __wt_backup_list_uri_append &&
		    F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
			WT_ERR(__wt_schema_worker(session, chunk->bloom_uri,
			    file_func, name_func, cfg, open_flags));
	}
err:	if (locked)
		WT_TRET(exclusive ?
		    __wt_lsm_tree_writeunlock(session, lsm_tree) :
		    __wt_lsm_tree_readunlock(session, lsm_tree));
	__wt_lsm_tree_release(session, lsm_tree);
	return (ret);
}
示例#3
0
/*
 * __backup_uri --
 *	Backup a list of objects.
 */
static int
__backup_uri(WT_SESSION_IMPL *session,
             WT_CURSOR_BACKUP *cb, const char *cfg[], int *foundp, int *log_only)
{
    WT_CONFIG targetconf;
    WT_CONFIG_ITEM cval, k, v;
    WT_DECL_ITEM(tmp);
    WT_DECL_RET;
    int target_list;
    const char *uri;

    *foundp = 0;
    *log_only = 0;

    /*
     * If we find a non-empty target configuration string, we have a job,
     * otherwise it's not our problem.
     */
    WT_RET(__wt_config_gets(session, cfg, "target", &cval));
    WT_RET(__wt_config_subinit(session, &targetconf, &cval));
    for (cb->list_next = 0, target_list = 0;
            (ret = __wt_config_next(&targetconf, &k, &v)) == 0; ++target_list) {
        /* If it is our first time through, allocate. */
        if (target_list == 0) {
            *foundp = 1;
            WT_ERR(__wt_scr_alloc(session, 512, &tmp));
        }

        WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)k.len, k.str));
        uri = tmp->data;
        if (v.len != 0)
            WT_ERR_MSG(session, EINVAL,
                       "%s: invalid backup target: URIs may need quoting",
                       uri);

        /*
         * Handle log targets.  We do not need to go through the
         * schema worker, just call the function to append them.
         * Set log_only only if it is our only URI target.
         */
        if (WT_PREFIX_MATCH(uri, "log:")) {
            if (target_list == 0)
                *log_only = 1;
            else
                *log_only = 0;
            WT_ERR(__wt_backup_list_uri_append(
                       session, uri, NULL));
        } else
            WT_ERR(__wt_schema_worker(session,
                                      uri, NULL, __wt_backup_list_uri_append, cfg, 0));
    }
    WT_ERR_NOTFOUND_OK(ret);

err:
    __wt_scr_free(session, &tmp);
    return (ret);
}
示例#4
0
/*btree file的compact操作*/
static int __compact_file(WT_SESSION_IMPL* session, const char* uri, const char* cfg[])
{
	WT_DECL_RET;
	WT_DECL_ITEM(t);
	WT_SESSION *wt_session;
	WT_TXN *txn;
	int i;
	struct timespec start_time;

	txn = &session->txn;
	wt_session = &session->iface;

	/*
	 * File compaction requires checkpoints, which will fail in a
	 * transactional context.  Check now so the error message isn't
	 * confusing.
	 */
	if(session->compact->file_count != 0 && F_ISSET(txn, TXN_RUNNING))
		WT_ERR_MSG(session, EINVAL, " File compaction not permitted in a transaction");

	/*
	 * Force the checkpoint: we don't want to skip it because the work we
	 * need to have done is done in the underlying block manager.
	 */
	WT_ERR(__wt_scr_alloc(session, 128, &t));
	WT_ERR(__wt_buf_fmt(session, t, "target=(\"%s\"),force=1", uri));

	WT_ERR(__wt_epoch(session, &start_time));

	/*
	 * We compact 10% of the file on each pass (but the overall size of the
	 * file is decreasing each time, so we're not compacting 10% of the
	 * original file each time). Try 100 times (which is clearly more than
	 * we need); quit if we make no progress and check for a timeout each
	 * time through the loop.
	 */
	for (i = 0; i < 100; ++i) {
		WT_ERR(wt_session->checkpoint(wt_session, t->data));

		session->compaction = 0;
		WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_compact, NULL, cfg, 0));
		WT_ERR(ret);
		if (!session->compaction)
			break;

		WT_ERR(wt_session->checkpoint(wt_session, t->data));
		WT_ERR(wt_session->checkpoint(wt_session, t->data));
		WT_ERR(__session_compact_check_timeout(session, start_time));
	}

err:
	__wt_scr_free(session, &t);
}
示例#5
0
/*
 * __session_sync --
 *	WT_SESSION->sync method.
 */
static int
__session_sync(WT_SESSION *wt_session, const char *uri, const char *config)
{
    WT_SESSION_IMPL *session;
    int ret;

    session = (WT_SESSION_IMPL *)wt_session;

    SESSION_API_CALL(session, sync, config, cfg);
    ret = __wt_schema_worker(session, uri, cfg, __wt_btree_sync, 0);

err:
    API_END_NOTFOUND_MAP(session, ret);
}
示例#6
0
/*
 * __session_dumpfile --
 *	WT_SESSION->dumpfile method.
 */
static int
__session_dumpfile(WT_SESSION *wt_session, const char *uri, const char *config)
{
    WT_SESSION_IMPL *session;
    int ret;

    session = (WT_SESSION_IMPL *)wt_session;
    SESSION_API_CALL(session, dumpfile, config, cfg);
    ret = __wt_schema_worker(session, uri, cfg,
                             __wt_dumpfile, WT_BTREE_EXCLUSIVE | WT_BTREE_VERIFY);

err:
    API_END_NOTFOUND_MAP(session, ret);
}
示例#7
0
/*
 * __session_verify --
 *	WT_SESSION->verify method.
 */
static int
__session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = (WT_SESSION_IMPL *)wt_session;

	SESSION_API_CALL(session, verify, config, cfg);
	WT_WITH_SCHEMA_LOCK(session,
	    ret = __wt_schema_worker(session, uri,
		__wt_verify, cfg, WT_BTREE_EXCLUSIVE | WT_BTREE_VERIFY));

err:	API_END_NOTFOUND_MAP(session, ret);
}
示例#8
0
/*
 * __session_compact_worker --
 *	Worker function to do the actual compaction call.
 */
static int
__session_compact_worker(
    WT_SESSION *wt_session, const char *uri, const char *config)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = (WT_SESSION_IMPL *)wt_session;
	SESSION_API_CALL(session, compact, config, cfg);

	WT_WITH_SCHEMA_LOCK(session,
	    ret = __wt_schema_worker(session, uri, __wt_compact, cfg, 0));

err:	API_END_NOTFOUND_MAP(session, ret);
}
示例#9
0
/*
 * __backup_uri --
 *	Backup a list of objects.
 */
static int
__backup_uri(WT_SESSION_IMPL *session,
    WT_CURSOR_BACKUP *cb, const char *cfg[], int *foundp)
{
	WT_CONFIG targetconf;
	WT_CONFIG_ITEM cval, k, v;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	int target_list;
	const char *uri;

	*foundp = target_list = 0;

	/*
	 * If we find a non-empty target configuration string, we have a job,
	 * otherwise it's not our problem.
	 */
	WT_RET(__wt_config_gets(session, cfg, "target", &cval));
	WT_RET(__wt_config_subinit(session, &targetconf, &cval));
	for (cb->list_next = 0;
	    (ret = __wt_config_next(&targetconf, &k, &v)) == 0;) {
		if (!target_list) {
			target_list = *foundp = 1;

			WT_ERR(__wt_scr_alloc(session, 512, &tmp));
		}

		WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)k.len, k.str));
		uri = tmp->data;
		if (v.len != 0)
			WT_ERR_MSG(session, EINVAL,
			    "%s: invalid backup target: URIs may need quoting",
			    uri);

		WT_ERR(__wt_schema_worker(
		    session, uri, NULL, __wt_backup_list_uri_append, cfg, 0));
	}
	WT_ERR_NOTFOUND_OK(ret);

err:	__wt_scr_free(&tmp);
	return (ret);
}
示例#10
0
/*
 * __wt_lsm_tree_worker --
 *	Run a schema worker operation on each level of a LSM tree.
 */
int
__wt_lsm_tree_worker(WT_SESSION_IMPL *session,
   const char *uri,
   int (*func)(WT_SESSION_IMPL *, const char *[]),
   const char *cfg[], uint32_t open_flags)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_TREE *lsm_tree;
	u_int i;

	WT_RET(__wt_lsm_tree_get(session, uri,
	    FLD_ISSET(open_flags, WT_BTREE_EXCLUSIVE) ? 1 : 0, &lsm_tree));
	for (i = 0; i < lsm_tree->nchunks; i++) {
		chunk = lsm_tree->chunk[i];
		if (func == __wt_checkpoint &&
		    F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
			continue;
		WT_ERR(__wt_schema_worker(
		    session, chunk->uri, func, cfg, open_flags));
	}
err:	__wt_lsm_tree_release(session, lsm_tree);
	return (ret);
}
示例#11
0
/*
 * __wt_session_compact --
 *	WT_SESSION.compact method.
 */
int
__wt_session_compact(
    WT_SESSION *wt_session, const char *uri, const char *config)
{
	WT_COMPACT_STATE compact;
	WT_CONFIG_ITEM cval;
	WT_DATA_SOURCE *dsrc;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	u_int i;
	bool ignore_cache_size_set;

	ignore_cache_size_set = false;

	session = (WT_SESSION_IMPL *)wt_session;
	SESSION_API_CALL(session, compact, config, cfg);

	/*
	 * The compaction thread should not block when the cache is full: it is
	 * holding locks blocking checkpoints and once the cache is full, it can
	 * spend a long time doing eviction.
	 */
	if (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE)) {
		ignore_cache_size_set = true;
		F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
	}

	/* In-memory ignores compaction operations. */
	if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
		goto err;

	/*
	 * Non-LSM object compaction requires checkpoints, which are impossible
	 * in transactional contexts. Disallow in all contexts (there's no
	 * reason for LSM to allow this, possible or not), and check now so the
	 * error message isn't confusing.
	 */
	WT_ERR(__wt_txn_context_check(session, false));

	/* Disallow objects in the WiredTiger name space. */
	WT_ERR(__wt_str_name_check(session, uri));

	if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
	    !WT_PREFIX_MATCH(uri, "file:") &&
	    !WT_PREFIX_MATCH(uri, "index:") &&
	    !WT_PREFIX_MATCH(uri, "lsm:") &&
	    !WT_PREFIX_MATCH(uri, "table:")) {
		if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
			ret = dsrc->compact == NULL ?
			    __wt_object_unsupported(session, uri) :
			    dsrc->compact(
			    dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg);
		else
			ret = __wt_bad_object_type(session, uri);
		goto err;
	}

	/* Setup the session handle's compaction state structure. */
	memset(&compact, 0, sizeof(WT_COMPACT_STATE));
	session->compact = &compact;

	/* Compaction can be time-limited. */
	WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval));
	session->compact->max_time = (uint64_t)cval.val;
	__wt_epoch(session, &session->compact->begin);

	/* Find the types of data sources being compacted. */
	WT_WITH_SCHEMA_LOCK(session,
	    ret = __wt_schema_worker(session, uri,
	    __compact_handle_append, __compact_uri_analyze, cfg, 0));
	WT_ERR(ret);

	if (session->compact->lsm_count != 0)
		WT_ERR(__wt_schema_worker(
		    session, uri, NULL, __wt_lsm_compact, cfg, 0));
	if (session->compact->file_count != 0)
		WT_ERR(__compact_worker(session));

err:	session->compact = NULL;

	for (i = 0; i < session->op_handle_next; ++i) {
		WT_WITH_DHANDLE(session, session->op_handle[i],
		    WT_TRET(__compact_end(session)));
		WT_WITH_DHANDLE(session, session->op_handle[i],
		    WT_TRET(__wt_session_release_dhandle(session)));
	}

	__wt_free(session, session->op_handle);
	session->op_handle_allocated = session->op_handle_next = 0;

	/*
	 * Release common session resources (for example, checkpoint may acquire
	 * significant reconciliation structures/memory).
	 */
	WT_TRET(__wt_session_release_resources(session));

	if (ignore_cache_size_set)
		F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);

	if (ret != 0)
		WT_STAT_CONN_INCR(session, session_table_compact_fail);
	else
		WT_STAT_CONN_INCR(session, session_table_compact_success);
	API_END_RET_NOTFOUND_MAP(session, ret);
}
示例#12
0
/*
 * __wt_schema_worker --
 *	Get Btree handles for the object and cycle through calls to an
 *	underlying worker function with each handle.
 */
int
__wt_schema_worker(WT_SESSION_IMPL *session,
   const char *uri,
   int (*file_func)(WT_SESSION_IMPL *, const char *[]),
   int (*name_func)(WT_SESSION_IMPL *, const char *, bool *),
   const char *cfg[], uint32_t open_flags)
{
	WT_COLGROUP *colgroup;
	WT_DATA_SOURCE *dsrc;
	WT_DECL_RET;
	WT_INDEX *idx;
	WT_SESSION *wt_session;
	WT_TABLE *table;
	u_int i;
	bool skip;

	table = NULL;

	skip = false;
	if (name_func != NULL)
		WT_ERR(name_func(session, uri, &skip));

	/* If the callback said to skip this object, we're done. */
	if (skip)
		return (0);

	/* Get the btree handle(s) and call the underlying function. */
	if (WT_PREFIX_MATCH(uri, "file:")) {
		if (file_func != NULL)
			WT_ERR(__wt_exclusive_handle_operation(session,
			    uri, file_func, cfg, open_flags));
	} else if (WT_PREFIX_MATCH(uri, "colgroup:")) {
		WT_ERR(__wt_schema_get_colgroup(
		    session, uri, false, NULL, &colgroup));
		WT_ERR(__wt_schema_worker(session,
		    colgroup->source, file_func, name_func, cfg, open_flags));
	} else if (WT_PREFIX_MATCH(uri, "index:")) {
		idx = NULL;
		WT_ERR(__wt_schema_get_index(session, uri, false, false, &idx));
		WT_ERR(__wt_schema_worker(session, idx->source,
		    file_func, name_func, cfg, open_flags));
	} else if (WT_PREFIX_MATCH(uri, "lsm:")) {
		WT_ERR(__wt_lsm_tree_worker(session,
		    uri, file_func, name_func, cfg, open_flags));
	} else if (WT_PREFIX_MATCH(uri, "table:")) {
		/*
		 * Note: we would like to use open_flags here (e.g., to lock
		 * the table exclusive during schema-changing operations), but
		 * that is currently problematic because we get the table again
		 * in order to discover column groups and indexes.
		 */
		WT_ERR(__wt_schema_get_table_uri(
		    session, uri, false, 0, &table));

		/*
		 * We could make a recursive call for each colgroup or index
		 * URI, but since we have already opened the table, we can take
		 * a short cut and skip straight to the sources.  If we have a
		 * name function, it needs to know about the intermediate URIs.
		 */
		for (i = 0; i < WT_COLGROUPS(table); i++) {
			colgroup = table->cgroups[i];
			skip = false;
			if (name_func != NULL)
				WT_ERR(name_func(
				    session, colgroup->name, &skip));
			if (!skip)
				WT_ERR(__wt_schema_worker(
				    session, colgroup->source,
				    file_func, name_func, cfg, open_flags));
		}

		WT_ERR(__wt_schema_open_indices(session, table));
		for (i = 0; i < table->nindices; i++) {
			idx = table->indices[i];
			skip = false;
			if (name_func != NULL)
				WT_ERR(name_func(session, idx->name, &skip));
			if (!skip)
				WT_ERR(__wt_schema_worker(session, idx->source,
				    file_func, name_func, cfg, open_flags));
		}
	} else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) {
		wt_session = (WT_SESSION *)session;
		if (file_func == __wt_salvage && dsrc->salvage != NULL)
			WT_ERR(dsrc->salvage(
			    dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
		else if (file_func == __wt_verify && dsrc->verify != NULL)
			WT_ERR(dsrc->verify(
			    dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
		else if (file_func == __wt_checkpoint)
			;
		else if (file_func == __wt_checkpoint_get_handles)
			;
		else if (file_func == __wt_checkpoint_sync)
			;
		else
			WT_ERR(__wt_object_unsupported(session, uri));
	} else
		WT_ERR(__wt_bad_object_type(session, uri));

err:	if (table != NULL)
		WT_TRET(__wt_schema_release_table(session, table));
	return (ret);
}
示例#13
0
文件: cur_backup.c 项目: DINKIN/mongo
/*
 * __backup_uri --
 *	Backup a list of objects.
 */
static int
__backup_uri(WT_SESSION_IMPL *session,
    const char *cfg[], bool *foundp, bool *log_only)
{
	WT_CONFIG targetconf;
	WT_CONFIG_ITEM cval, k, v;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	bool target_list;
	const char *uri;

	*foundp = *log_only = false;

	/*
	 * If we find a non-empty target configuration string, we have a job,
	 * otherwise it's not our problem.
	 */
	WT_RET(__wt_config_gets(session, cfg, "target", &cval));
	__wt_config_subinit(session, &targetconf, &cval);
	for (target_list = false;
	    (ret = __wt_config_next(&targetconf, &k, &v)) == 0;
	    target_list = true) {
		/* If it is our first time through, allocate. */
		if (!target_list) {
			*foundp = true;
			WT_ERR(__wt_scr_alloc(session, 512, &tmp));
		}

		WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)k.len, k.str));
		uri = tmp->data;
		if (v.len != 0)
			WT_ERR_MSG(session, EINVAL,
			    "%s: invalid backup target: URIs may need quoting",
			    uri);

		/*
		 * Handle log targets. We do not need to go through the schema
		 * worker, just call the function to append them. Set log_only
		 * only if it is our only URI target.
		 */
		if (WT_PREFIX_MATCH(uri, "log:")) {
			/*
			 * Log archive cannot mix with incremental backup, don't
			 * let that happen.
			 */
			if (FLD_ISSET(
			    S2C(session)->log_flags, WT_CONN_LOG_ARCHIVE))
				WT_ERR_MSG(session, EINVAL,
				    "incremental backup not possible when "
				    "automatic log archival configured");
			*log_only = !target_list;
			WT_ERR(__backup_log_append(
			    session, session->bkp_cursor, false));
		} else {
			*log_only = false;
			WT_ERR(__wt_schema_worker(session,
			    uri, NULL, __backup_list_uri_append, cfg, 0));
		}
	}
	WT_ERR_NOTFOUND_OK(ret);

err:	__wt_scr_free(session, &tmp);
	return (ret);
}
示例#14
0
/*
 * __wt_lsm_checkpoint_worker --
 *	A worker thread for an LSM tree, responsible for flushing new chunks to
 *	disk.
 */
void *
__wt_lsm_checkpoint_worker(void *arg)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_TREE *lsm_tree;
	WT_LSM_WORKER_COOKIE cookie;
	WT_SESSION_IMPL *session;
	WT_TXN_ISOLATION saved_isolation;
	u_int i, j;
	int locked;

	lsm_tree = arg;
	session = lsm_tree->ckpt_session;

	WT_CLEAR(cookie);

	while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
		if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
			WT_WITH_SCHEMA_LOCK(session, ret =
			    __wt_lsm_tree_switch(session, lsm_tree));
			WT_ERR(ret);
		}

		WT_ERR(__lsm_copy_chunks(session, lsm_tree, &cookie, 0));

		/* Write checkpoints in all completed files. */
		for (i = 0, j = 0; i < cookie.nchunks - 1; i++) {
			if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING))
				goto err;

			if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
				break;

			chunk = cookie.chunk_array[i];

			/* Stop if a running transaction needs the chunk. */
			__wt_txn_update_oldest(session);
			if (!__wt_txn_visible_all(session, chunk->txnid_max))
				break;

			/*
			 * If the chunk is already checkpointed, make sure it
			 * is also evicted.  Either way, there is no point
			 * trying to checkpoint it again.
			 */
			if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK)) {
				if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_EVICTED))
					continue;

				if ((ret = __lsm_discard_handle(
				    session, chunk->uri, NULL)) == 0)
					F_SET_ATOMIC(
					    chunk, WT_LSM_CHUNK_EVICTED);
				else if (ret == EBUSY)
					ret = 0;
				else
					WT_ERR_MSG(session, ret,
					    "discard handle");
				continue;
			}

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker flushing %u", i);

			/*
			 * Flush the file before checkpointing: this is the
			 * expensive part in terms of I/O: do it without
			 * holding the schema lock.
			 *
			 * Use the special eviction isolation level to avoid
			 * interfering with an application checkpoint: we have
			 * already checked that all of the updates in this
			 * chunk are globally visible.
			 *
			 * !!! We can wait here for checkpoints and fsyncs to
			 * complete, which can be a long time.
			 *
			 * Don't keep waiting for the lock if application
			 * threads are waiting for a switch.  Don't skip
			 * flushing the leaves either: that just means we'll
			 * hold the schema lock for (much) longer, which blocks
			 * the world.
			 */
			WT_ERR(__wt_session_get_btree(
			    session, chunk->uri, NULL, NULL, 0));
			for (locked = 0;
			    !locked && ret == 0 &&
			    !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH);) {
				if ((ret = __wt_spin_trylock(session,
				    &S2C(session)->checkpoint_lock)) == 0)
					locked = 1;
				else if (ret == EBUSY) {
					__wt_yield();
					ret = 0;
				}
			}
			if (locked) {
				saved_isolation = session->txn.isolation;
				session->txn.isolation = TXN_ISO_EVICTION;
				ret = __wt_bt_cache_op(
				    session, NULL, WT_SYNC_WRITE_LEAVES);
				session->txn.isolation = saved_isolation;
				__wt_spin_unlock(
				    session, &S2C(session)->checkpoint_lock);
			}
			WT_TRET(__wt_session_release_btree(session));
			WT_ERR(ret);

			if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
				break;

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker checkpointing %u", i);

			WT_WITH_SCHEMA_LOCK(session,
			    ret = __wt_schema_worker(session, chunk->uri,
			    __wt_checkpoint, NULL, NULL, 0));

			if (ret != 0) {
				__wt_err(session, ret, "LSM checkpoint");
				break;
			}

			WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
			/*
			 * Clear the "cache resident" flag so the primary can
			 * be evicted and eventually closed.  Only do this once
			 * the checkpoint has succeeded: otherwise, accessing
			 * the leaf page during the checkpoint can trigger
			 * forced eviction.
			 */
			WT_ERR(__wt_session_get_btree(
			    session, chunk->uri, NULL, NULL, 0));
			__wt_btree_evictable(session, 1);
			WT_ERR(__wt_session_release_btree(session));

			++j;
			WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1));
			F_SET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK);
			ret = __wt_lsm_meta_write(session, lsm_tree);
			++lsm_tree->dsk_gen;

			/* Update the throttle time. */
			__wt_lsm_tree_throttle(session, lsm_tree);
			WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree));

			/* Make sure we aren't pinning a transaction ID. */
			__wt_txn_release_snapshot(session);

			if (ret != 0) {
				__wt_err(session, ret,
				    "LSM checkpoint metadata write");
				break;
			}

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker checkpointed %u", i);
		}
		__lsm_unpin_chunks(session, &cookie);
		if (j == 0 && F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) &&
		    !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
			WT_ERR_TIMEDOUT_OK(__wt_cond_wait(
			    session, lsm_tree->work_cond, 100000));
	}
err:	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);
	/*
	 * The thread will only exit with failure if we run out of memory or
	 * there is some other system driven failure. We can't keep going
	 * after such a failure - ensure WiredTiger shuts down.
	 */
	if (ret != 0 && ret != WT_NOTFOUND)
		WT_PANIC_ERR(session, ret,
		    "Shutting down LSM checkpoint utility thread");
	return (NULL);
}
示例#15
0
/*
 * __wt_lsm_checkpoint_chunk --
 *	Flush a single LSM chunk to disk.
 */
int
__wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
    WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
	WT_DECL_RET;
	WT_TXN_ISOLATION saved_isolation;

	/*
	 * If the chunk is already checkpointed, make sure it is also evicted.
	 * Either way, there is no point trying to checkpoint it again.
	 */
	if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
	    !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) &&
	    !chunk->evicted) {
		if ((ret = __lsm_discard_handle(
		    session, chunk->uri, NULL)) == 0)
			chunk->evicted = 1;
		else if (ret == EBUSY)
			ret = 0;
		else
			WT_RET_MSG(session, ret, "discard handle");
	}
	if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
		WT_RET(__wt_verbose(session, WT_VERB_LSM,
		    "LSM worker %s already on disk",
		    chunk->uri));
		return (0);
	}

	/* Stop if a running transaction needs the chunk. */
	__wt_txn_update_oldest(session);
	if (chunk->switch_txn == WT_TXN_NONE ||
	    !__wt_txn_visible_all(session, chunk->switch_txn)) {
		WT_RET(__wt_verbose(session, WT_VERB_LSM,
		    "LSM worker %s: running transaction, return",
		    chunk->uri));
		return (0);
	}

	WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s",
	    chunk->uri));

	/*
	 * Flush the file before checkpointing: this is the expensive part in
	 * terms of I/O.
	 *
	 * Use the special eviction isolation level to avoid interfering with
	 * an application checkpoint: we have already checked that all of the
	 * updates in this chunk are globally visible.
	 *
	 * !!! We can wait here for checkpoints and fsyncs to complete, which
	 * can be a long time.
	 */
	if ((ret = __wt_session_get_btree(
	    session, chunk->uri, NULL, NULL, 0)) == 0) {
		saved_isolation = session->txn.isolation;
		session->txn.isolation = TXN_ISO_EVICTION;
		ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES);
		session->txn.isolation = saved_isolation;
		WT_TRET(__wt_session_release_btree(session));
	}
	WT_RET(ret);

	WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s",
	    chunk->uri));

	WT_WITH_SCHEMA_LOCK(session,
	    ret = __wt_schema_worker(session, chunk->uri,
	    __wt_checkpoint, NULL, NULL, 0));

	if (ret != 0)
		WT_RET_MSG(session, ret, "LSM checkpoint");

	/* Now the file is written, get the chunk size. */
	WT_RET(__wt_lsm_tree_set_chunk_size(session, chunk));

	/* Update the flush timestamp to help track ongoing progress. */
	WT_RET(__wt_epoch(session, &lsm_tree->last_flush_ts));

	/* Lock the tree, mark the chunk as on disk and update the metadata. */
	WT_RET(__wt_lsm_tree_writelock(session, lsm_tree));
	F_SET(chunk, WT_LSM_CHUNK_ONDISK);
	ret = __wt_lsm_meta_write(session, lsm_tree);
	++lsm_tree->dsk_gen;

	/* Update the throttle time. */
	__wt_lsm_tree_throttle(session, lsm_tree, 1);
	WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));

	if (ret != 0)
		WT_RET_MSG(session, ret, "LSM metadata write");

	/*
	 * Clear the no-eviction flag so the primary can be evicted and
	 * eventually closed.  Only do this once the checkpoint has succeeded:
	 * otherwise, accessing the leaf page during the checkpoint can trigger
	 * forced eviction.
	 */
	WT_RET(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0));
	__wt_btree_evictable(session, 1);
	WT_RET(__wt_session_release_btree(session));

	/* Make sure we aren't pinning a transaction ID. */
	__wt_txn_release_snapshot(session);

	WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed %s",
	    chunk->uri));

	/* Schedule a bloom filter create for our newly flushed chunk. */
	if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF))
		WT_RET(__wt_lsm_manager_push_entry(
		    session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
	else
		WT_RET(__wt_lsm_manager_push_entry(
		    session, WT_LSM_WORK_MERGE, 0, lsm_tree));
	return (0);
}
示例#16
0
/*
 * __wt_lsm_checkpoint_chunk --
 *	Flush a single LSM chunk to disk.
 */
int
__wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
    WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
	WT_DECL_RET;
	WT_TXN_ISOLATION saved_isolation;
	bool flush_set;

	flush_set = false;

	/*
	 * If the chunk is already checkpointed, make sure it is also evicted.
	 * Either way, there is no point trying to checkpoint it again.
	 */
	if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
	    !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) &&
	    !chunk->evicted) {
		WT_WITH_HANDLE_LIST_LOCK(session,
		    ret = __lsm_discard_handle(session, chunk->uri, NULL));
		if (ret == 0)
			chunk->evicted = 1;
		else if (ret == EBUSY)
			ret = 0;
		else
			WT_RET_MSG(session, ret, "discard handle");
	}
	if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
		WT_RET(__wt_verbose(session, WT_VERB_LSM,
		    "LSM worker %s already on disk",
		    chunk->uri));
		return (0);
	}

	/* Stop if a running transaction needs the chunk. */
	__wt_txn_update_oldest(session, true);
	if (chunk->switch_txn == WT_TXN_NONE ||
	    !__wt_txn_visible_all(session, chunk->switch_txn)) {
		WT_RET(__wt_verbose(session, WT_VERB_LSM,
		    "LSM worker %s: running transaction, return",
		    chunk->uri));
		return (0);
	}

	if (!__wt_atomic_cas8(&chunk->flushing, 0, 1))
		return (0);
	flush_set = true;

	WT_ERR(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s",
	    chunk->uri));

	/*
	 * Flush the file before checkpointing: this is the expensive part in
	 * terms of I/O.
	 *
	 * !!!
	 * We can wait here for checkpoints and fsyncs to complete, which can
	 * take a long time.
	 */
	if ((ret = __wt_session_get_btree(
	    session, chunk->uri, NULL, NULL, 0)) == 0) {
		/*
		 * Set read-uncommitted: we have already checked that all of the
		 * updates in this chunk are globally visible, use the cheapest
		 * possible check in reconciliation.
		 */
		saved_isolation = session->txn.isolation;
		session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
		ret = __wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES);
		session->txn.isolation = saved_isolation;
		WT_TRET(__wt_session_release_btree(session));
	}
	WT_ERR(ret);

	WT_ERR(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s",
	    chunk->uri));

	/*
	 * Turn on metadata tracking to ensure the checkpoint gets the
	 * necessary handle locks.
	 *
	 * Ensure that we don't race with a running checkpoint: the checkpoint
	 * lock protects against us racing with an application checkpoint in
	 * this chunk.  Don't wait for it, though: checkpoints can take a long
	 * time, and our checkpoint operation should be very quick.
	 */
	WT_ERR(__wt_meta_track_on(session));
	WT_WITH_CHECKPOINT_LOCK(session, ret,
	    WT_WITH_SCHEMA_LOCK(session, ret,
		ret = __wt_schema_worker(
		session, chunk->uri, __wt_checkpoint, NULL, NULL, 0)));
	WT_TRET(__wt_meta_track_off(session, false, ret != 0));
	if (ret != 0)
		WT_ERR_MSG(session, ret, "LSM checkpoint");

	/* Now the file is written, get the chunk size. */
	WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));

	/* Update the flush timestamp to help track ongoing progress. */
	WT_ERR(__wt_epoch(session, &lsm_tree->last_flush_ts));
	++lsm_tree->chunks_flushed;

	/* Lock the tree, mark the chunk as on disk and update the metadata. */
	WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));
	F_SET(chunk, WT_LSM_CHUNK_ONDISK);
	ret = __wt_lsm_meta_write(session, lsm_tree);
	++lsm_tree->dsk_gen;

	/* Update the throttle time. */
	__wt_lsm_tree_throttle(session, lsm_tree, true);
	WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
	if (ret != 0)
		WT_ERR_MSG(session, ret, "LSM metadata write");

	WT_PUBLISH(chunk->flushing, 0);
	flush_set = false;

	/*
	 * Clear the no-eviction flag so the primary can be evicted and
	 * eventually closed.  Only do this once the checkpoint has succeeded:
	 * otherwise, accessing the leaf page during the checkpoint can trigger
	 * forced eviction.
	 */
	WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0));
	__wt_btree_evictable(session, true);
	WT_ERR(__wt_session_release_btree(session));

	/* Make sure we aren't pinning a transaction ID. */
	__wt_txn_release_snapshot(session);

	WT_ERR(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed %s",
	    chunk->uri));

	/* Schedule a bloom filter create for our newly flushed chunk. */
	if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF))
		WT_ERR(__wt_lsm_manager_push_entry(
		    session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
	else
		WT_ERR(__wt_lsm_manager_push_entry(
		    session, WT_LSM_WORK_MERGE, 0, lsm_tree));

err:	if (flush_set)
		WT_PUBLISH(chunk->flushing, 0);

	return (ret);
}
示例#17
0
/*
 * __wt_schema_worker --
 *	Get Btree handles for the object and cycle through calls to an
 * underlying worker function with each handle.
 */
int
__wt_schema_worker(WT_SESSION_IMPL *session,
   const char *uri,
   int (*file_func)(WT_SESSION_IMPL *, const char *[]),
   int (*name_func)(WT_SESSION_IMPL *, const char *),
   const char *cfg[], uint32_t open_flags)
{
	WT_COLGROUP *colgroup;
	WT_DATA_SOURCE *dsrc;
	WT_DECL_RET;
	WT_INDEX *idx;
	WT_SESSION *wt_session;
	WT_TABLE *table;
	const char *tablename;
	u_int i;

	table = NULL;
	tablename = uri;

	if (name_func != NULL)
		WT_ERR(name_func(session, uri));

	/* Get the btree handle(s) and call the underlying function. */
	if (WT_PREFIX_MATCH(uri, "file:")) {
		if (file_func != NULL) {
			WT_ERR(__wt_session_get_btree_ckpt(
			    session, uri, cfg, open_flags));
			ret = file_func(session, cfg);
			WT_TRET(__wt_session_release_btree(session));
		}
	} else if (WT_PREFIX_MATCH(uri, "colgroup:")) {
		WT_ERR(__wt_schema_get_colgroup(session, uri, NULL, &colgroup));
		WT_ERR(__wt_schema_worker(session, colgroup->source,
		    file_func, name_func, cfg, open_flags));
	} else if (WT_PREFIX_SKIP(tablename, "index:")) {
		idx = NULL;
		WT_ERR(__wt_schema_get_index(session, uri, NULL, &idx));
		WT_ERR(__wt_schema_worker(session, idx->source,
		    file_func, name_func, cfg, open_flags));
	} else if (WT_PREFIX_MATCH(uri, "lsm:")) {
		WT_ERR(__wt_lsm_tree_worker(
		    session, uri, file_func, name_func, cfg, open_flags));
	} else if (WT_PREFIX_SKIP(tablename, "table:")) {
		WT_ERR(__wt_schema_get_table(session,
		    tablename, strlen(tablename), 0, &table));
		WT_ASSERT(session, session->dhandle == NULL);

		/*
		 * We could make a recursive call for each colgroup or index
		 * URI, but since we have already opened the table, we can take
		 * a short cut and skip straight to the sources.  If we have a
		 * name function, it needs to know about the intermediate URIs.
		 */
		for (i = 0; i < WT_COLGROUPS(table); i++) {
			colgroup = table->cgroups[i];
			if (name_func != NULL)
				WT_ERR(name_func(session, colgroup->name));
			WT_ERR(__wt_schema_worker(session, colgroup->source,
			    file_func, name_func, cfg, open_flags));
		}

		WT_ERR(__wt_schema_open_indices(session, table));
		for (i = 0; i < table->nindices; i++) {
			idx = table->indices[i];
			if (name_func != NULL)
				WT_ERR(name_func(session, idx->name));
			WT_ERR(__wt_schema_worker(session, idx->source,
			    file_func, name_func, cfg, open_flags));
		}
	} else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) {
		wt_session = (WT_SESSION *)session;
		if (file_func == __wt_compact && dsrc->compact != NULL)
			WT_ERR(dsrc->compact(
			    dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
		else if (file_func == __wt_salvage && dsrc->salvage != NULL)
			WT_ERR(dsrc->salvage(
			   dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
		else if (file_func == __wt_verify && dsrc->verify != NULL)
			WT_ERR(dsrc->verify(
			   dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg));
		else
			WT_ERR(__wt_object_unsupported(session, uri));
	} else
		WT_ERR(__wt_bad_object_type(session, uri));

err:	if (table != NULL)
		__wt_schema_release_table(session, table);
	return (ret);
}