Beispiel #1
0
/*
 * __meta_track_apply --
 *	Apply the changes in a metadata tracking record.
 */
static int
__meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_DECL_RET;

	switch (trk->op) {
	case WT_ST_EMPTY:	/* Unused slot */
		break;
	case WT_ST_CHECKPOINT:	/* Checkpoint, see above */
		btree = trk->dhandle->handle;
		bm = btree->bm;
		WT_WITH_DHANDLE(session, trk->dhandle,
		    ret = bm->checkpoint_resolve(bm, session));
		break;
	case WT_ST_DROP_COMMIT:
		if ((ret =
		    __wt_block_manager_drop(session, trk->a, false)) != 0)
			__wt_err(session, ret,
			    "metadata remove dropped file %s", trk->a);
		break;
	case WT_ST_LOCK:
		WT_WITH_DHANDLE(session, trk->dhandle,
		    ret = __wt_session_release_btree(session));
		break;
	case WT_ST_FILEOP:
	case WT_ST_REMOVE:
	case WT_ST_SET:
		break;
	}

	__meta_track_clear(session, trk);
	return (ret);
}
Beispiel #2
0
/*
 * __wt_meta_track_off --
 *	Turn off metadata operation tracking, unrolling on error.
 */
int
__wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll)
{
	WT_DECL_RET;
	WT_META_TRACK *trk, *trk_orig;

	WT_ASSERT(session,
	    WT_META_TRACKING(session) && session->meta_track_nest > 0);

	trk_orig = session->meta_track;
	trk = session->meta_track_next;

	/* If it was a nested transaction, there is nothing to do. */
	if (--session->meta_track_nest != 0)
		return (0);

	/* Turn off tracking for unroll. */
	session->meta_track_next = session->meta_track_sub = NULL;

	/*
	 * If there were no operations logged, return now and avoid unnecessary
	 * metadata checkpoints.  For example, this happens if attempting to
	 * create a data source that already exists (or drop one that doesn't).
	 */
	if (trk == trk_orig)
		return (0);

	while (--trk >= trk_orig)
		WT_TRET(__meta_track_apply(session, trk, unroll));

	/*
	 * Unroll operations don't need to flush the metadata.
	 *
	 * Also, if we don't have the metadata handle (e.g, we're in the
	 * process of creating the metadata), we can't sync it.
	 */
	if (unroll || ret != 0 || !need_sync || session->meta_dhandle == NULL)
		return (ret);

	/* If we're logging, make sure the metadata update was flushed. */
	if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) {
		if (!FLD_ISSET(S2C(session)->txn_logsync,
		    WT_LOG_DSYNC | WT_LOG_FSYNC))
			WT_WITH_DHANDLE(session, session->meta_dhandle,
			    ret = __wt_txn_checkpoint_log(session,
			    0, WT_TXN_LOG_CKPT_SYNC, NULL));
	} else {
		WT_WITH_DHANDLE(session, session->meta_dhandle,
		    ret = __wt_checkpoint(session, NULL));
		WT_RET(ret);
		WT_WITH_DHANDLE(session, session->meta_dhandle,
		    ret = __wt_checkpoint_sync(session, NULL));
	}

	return (ret);
}
Beispiel #3
0
/*
 * __compact_worker --
 *	Function to alternate between checkpoints and compaction calls.
 */
static int
__compact_worker(WT_SESSION_IMPL *session)
{
	WT_DECL_RET;
	u_int i, loop;
	bool didwork;

	/*
	 * Reset the handles' compaction skip flag (we don't bother setting
	 * or resetting it when we finish compaction, it's simpler to do it
	 * once, here).
	 */
	for (i = 0; i < session->op_handle_next; ++i)
		session->op_handle[i]->compact_skip = false;

	/*
	 * Perform an initial checkpoint (see this file's leading comment for
	 * details).
	 */
	WT_ERR(__compact_checkpoint(session));

	/*
	 * We compact 10% of a file on each pass (but the overall size of the
	 * file is decreasing each time, so we're not compacting 10% of the
	 * original file each time). Try 100 times (which is clearly more than
	 * we need); quit if we make no progress.
	 */
	for (loop = 0; loop < 100; ++loop) {
		/* Step through the list of files being compacted. */
		for (didwork = false, i = 0; i < session->op_handle_next; ++i) {
			/* Skip objects where there's no more work. */
			if (session->op_handle[i]->compact_skip)
				continue;

			session->compact_state = WT_COMPACT_RUNNING;
			WT_WITH_DHANDLE(session,
			    session->op_handle[i], ret = __wt_compact(session));
			WT_ERR(ret);

			/* If we did no work, skip this file in the future. */
			if (session->compact_state == WT_COMPACT_SUCCESS)
				didwork = true;
			else
				session->op_handle[i]->compact_skip = true;
		}
		if (!didwork)
			break;

		/*
		 * Perform two checkpoints (see this file's leading comment for
		 * details).
		 */
		WT_ERR(__compact_checkpoint(session));
		WT_ERR(__compact_checkpoint(session));
	}

err:	session->compact_state = WT_COMPACT_NONE;

	return (ret);
}
Beispiel #4
0
/*
 * __meta_track_unroll --
 *	Undo the changes in a metadata tracking record.
 */
static int
__meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
	WT_DECL_RET;

	switch (trk->op) {
	case WT_ST_EMPTY:	/* Unused slot */
		break;
	case WT_ST_CHECKPOINT:	/* Checkpoint, see above */
		break;
	case WT_ST_DROP_COMMIT:
		break;
	case WT_ST_LOCK:	/* Handle lock, see above */
		if (trk->created)
			F_SET(trk->dhandle, WT_DHANDLE_DISCARD);
		WT_WITH_DHANDLE(session, trk->dhandle,
		    ret = __wt_session_release_btree(session));
		break;
	case WT_ST_FILEOP:	/* File operation */
		/*
		 * For renames, both a and b are set.
		 * For creates, a is NULL.
		 * For removes, b is NULL.
		 */
		if (trk->a != NULL && trk->b != NULL &&
		    (ret = __wt_fs_rename(session,
		    trk->b + strlen("file:"), trk->a + strlen("file:"),
		    true)) != 0)
			__wt_err(session, ret,
			    "metadata unroll rename %s to %s", trk->b, trk->a);

		if (trk->a == NULL &&
		    (ret = __wt_fs_remove(session,
		    trk->b + strlen("file:"), false)) != 0)
			__wt_err(session, ret,
			    "metadata unroll create %s", trk->b);

		/*
		 * We can't undo removes yet: that would imply
		 * some kind of temporary rename and remove in
		 * roll forward.
		 */
		break;
	case WT_ST_REMOVE:	/* Remove trk.a */
		if ((ret = __wt_metadata_remove(session, trk->a)) != 0)
			__wt_err(session, ret,
			    "metadata unroll remove: %s", trk->a);
		break;
	case WT_ST_SET:		/* Set trk.a to trk.b */
		if ((ret = __wt_metadata_update(session, trk->a, trk->b)) != 0)
			__wt_err(session, ret,
			    "metadata unroll update %s to %s", trk->a, trk->b);
		break;
	}

	__meta_track_clear(session, trk);
	return (ret);
}
Beispiel #5
0
/*
 * __wt_schema_release_table --
 *	Release a table handle.
 */
int
__wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE *table)
{
	WT_DECL_RET;

	WT_WITH_DHANDLE(session, &table->iface,
	    ret = __wt_session_release_dhandle(session));

	return (ret);
}
Beispiel #6
0
/*
 * __wt_schema_release_table --
 *	Release a table handle.
 */
int
__wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep)
{
	WT_DECL_RET;
	WT_TABLE *table;

	if ((table = *tablep) == NULL)
		return (0);
	*tablep = NULL;

	WT_WITH_DHANDLE(session, &table->iface,
	    ret = __wt_session_release_dhandle(session));

	return (ret);
}
Beispiel #7
0
/*
 * __backup_cleanup_handles --
 *	Release and free all btree handles held by the backup. This is kept
 *	separate from __backup_stop because it can be called without the
 *	schema lock held.
 */
static int
__backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
{
	WT_CURSOR_BACKUP_ENTRY *p;
	WT_DECL_RET;

	if (cb->list == NULL)
		return (0);

	/* Release the handles, free the file names, free the list itself. */
	for (p = cb->list; p->name != NULL; ++p) {
		if (p->handle != NULL)
			WT_WITH_DHANDLE(session, p->handle,
			    WT_TRET(__wt_session_release_btree(session)));
		__wt_free(session, p->name);
	}

	__wt_free(session, cb->list);
	return (ret);
}
Beispiel #8
0
/*
 * __wt_meta_track_off --
 *	Turn off metadata operation tracking, unrolling on error.
 */
int
__wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
{
	WT_DECL_RET;
	WT_META_TRACK *trk, *trk_orig;
	WT_SESSION_IMPL *ckpt_session;

	WT_ASSERT(session,
	    WT_META_TRACKING(session) && session->meta_track_nest > 0);

	trk_orig = session->meta_track;
	trk = session->meta_track_next;

	/* If it was a nested transaction, there is nothing to do. */
	if (--session->meta_track_nest != 0)
		return (0);

	/* Turn off tracking for unroll. */
	session->meta_track_next = session->meta_track_sub = NULL;

	/*
	 * If there were no operations logged, return now and avoid unnecessary
	 * metadata checkpoints.  For example, this happens if attempting to
	 * create a data source that already exists (or drop one that doesn't).
	 */
	if (trk == trk_orig)
		return (0);

	if (unroll) {
		while (--trk >= trk_orig)
			WT_TRET(__meta_track_unroll(session, trk));
		/* Unroll operations don't need to flush the metadata. */
		return (ret);
	}

	/*
	 * If we don't have the metadata cursor (e.g, we're in the process of
	 * creating the metadata), we can't sync it.
	 */
	if (!need_sync || session->meta_cursor == NULL ||
	    F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
		goto done;

	/* If we're logging, make sure the metadata update was flushed. */
	if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) {
		WT_WITH_DHANDLE(session,
		    WT_SESSION_META_DHANDLE(session),
		    ret = __wt_txn_checkpoint_log(
			session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
		WT_RET(ret);
	} else {
		WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
		ckpt_session = S2C(session)->meta_ckpt_session;
		/*
		 * If this operation is part of a running transaction, that
		 * should be included in the checkpoint.
		 */
		ckpt_session->txn.id = session->txn.id;
		F_SET(ckpt_session, WT_SESSION_LOCKED_METADATA);
		WT_WITH_METADATA_LOCK(session, ret,
		    WT_WITH_DHANDLE(ckpt_session,
			WT_SESSION_META_DHANDLE(session),
			ret = __wt_checkpoint(ckpt_session, NULL)));
		F_CLR(ckpt_session, WT_SESSION_LOCKED_METADATA);
		ckpt_session->txn.id = WT_TXN_NONE;
		WT_RET(ret);
		WT_WITH_DHANDLE(session,
		    WT_SESSION_META_DHANDLE(session),
		    ret = __wt_checkpoint_sync(session, NULL));
		WT_RET(ret);
	}

done:	/* Apply any tracked operations post-commit. */
	for (; trk_orig < trk; trk_orig++)
		WT_TRET(__meta_track_apply(session, trk_orig));
	return (ret);
}
/*
 * __sweep --
 *	Close unused dhandles on the connection dhandle list.
 */
static int
__sweep(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DATA_HANDLE *dhandle, *dhandle_next;
	WT_DECL_RET;
	time_t now;

	conn = S2C(session);

	/*
	 * Session's cache handles unless the session itself is closed, at which
	 * time the handle reference counts are immediately decremented.  Don't
	 * discard handles that have been open recently.
	 */
	WT_RET(__wt_seconds(session, &now));

	dhandle = SLIST_FIRST(&conn->dhlh);
	for (; dhandle != NULL; dhandle = dhandle_next) {
		dhandle_next = SLIST_NEXT(dhandle, l);
		if (dhandle->session_ref != 0 ||
		    now - dhandle->timeofdeath <= WT_DHANDLE_SWEEP_WAIT)
			continue;

		/*
		 * We have a candidate for closing; if it's open, acquire an
		 * exclusive lock on the handle and close it (the lock blocks
		 * threads from opening the handle).  We might be blocking an
		 * open for a fairly long time (over disk I/O), but the handle
		 * has been quiescent for awhile.
		 *
		 * The close can fail if an update cannot be written (updates in
		 * a no-longer-referenced file might not yet be globally visible
		 * if sessions have disjoint sets of files open).  If the handle
		 * is busy, skip it, we'll retry the close the next time, after
		 * the transaction state has progressed.
		 */
		if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
			/*
			 * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we
			 * want opens to block on us rather than returning an
			 * EBUSY error to the application.
			 */
			ret = __wt_try_writelock(session, dhandle->rwlock);
			if (ret == EBUSY) {
				ret = 0;
				continue;
			}
			WT_RET(ret);

			WT_WITH_DHANDLE(session, dhandle,
			    ret = __wt_conn_btree_sync_and_close(session));
			if (ret == EBUSY)
				ret = 0;

			WT_TRET(__wt_rwunlock(session, dhandle->rwlock));
			WT_RET(ret);
		}

		/*
		 * Attempt to discard the handle (the called function checks the
		 * handle-open flag after acquiring appropriate locks, which is
		 * why we don't do any special handling of EBUSY returns above,
		 * that path never cleared the handle-open flag.
		 */
		ret = __wt_conn_dhandle_discard_single(session, dhandle, 0);
		if (ret == EBUSY)
			ret = 0;
		WT_RET(ret);
	}
	return (0);
}
Beispiel #10
0
/*
 * __meta_track_apply --
 *	Apply the changes in a metadata tracking record.
 */
static int
__meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_DECL_RET;
	int tret;

	/*
	 * Unlock handles and complete checkpoints regardless of whether we are
	 * unrolling.
	 */
	if (!unroll && trk->op != WT_ST_CHECKPOINT && trk->op != WT_ST_LOCK)
		goto free;

	switch (trk->op) {
	case WT_ST_EMPTY:	/* Unused slot */
		break;
	case WT_ST_CHECKPOINT:	/* Checkpoint, see above */
		if (!unroll) {
			btree = trk->dhandle->handle;
			bm = btree->bm;
			WT_WITH_DHANDLE(session, trk->dhandle,
			    WT_TRET(bm->checkpoint_resolve(bm, session)));
		}
		break;
	case WT_ST_LOCK:	/* Handle lock, see above */
		if (unroll && trk->created)
			F_SET(trk->dhandle, WT_DHANDLE_DISCARD);
		WT_WITH_DHANDLE(session, trk->dhandle,
		    WT_TRET(__wt_session_release_btree(session)));
		break;
	case WT_ST_FILEOP:	/* File operation */
		/*
		 * For renames, both a and b are set.
		 * For creates, a is NULL.
		 * For removes, b is NULL.
		 */
		if (trk->a != NULL && trk->b != NULL &&
		    (tret = __wt_rename(session,
		    trk->b + strlen("file:"),
		    trk->a + strlen("file:"))) != 0) {
			__wt_err(session, tret,
			    "metadata unroll rename %s to %s",
			    trk->b, trk->a);
			WT_TRET(tret);
		} else if (trk->a == NULL) {
			if ((tret = __wt_remove(session,
			    trk->b + strlen("file:"))) != 0) {
				__wt_err(session, tret,
				    "metadata unroll create %s",
				    trk->b);
				WT_TRET(tret);
			}
		}
		/*
		 * We can't undo removes yet: that would imply
		 * some kind of temporary rename and remove in
		 * roll forward.
		 */
		break;
	case WT_ST_REMOVE:	/* Remove trk.a */
		if ((tret = __wt_metadata_remove(session, trk->a)) != 0) {
			__wt_err(session, tret,
			    "metadata unroll remove: %s",
			    trk->a);
			WT_TRET(tret);
		}
		break;
	case WT_ST_SET:		/* Set trk.a to trk.b */
		if ((tret = __wt_metadata_update(
		    session, trk->a, trk->b)) != 0) {
			__wt_err(session, tret,
			    "metadata unroll update %s to %s",
			    trk->a, trk->b);
			WT_TRET(tret);
		}
		break;
	WT_ILLEGAL_VALUE(session);
	}

free:	trk->op = WT_ST_EMPTY;
	__wt_free(session, trk->a);
	__wt_free(session, trk->b);
	trk->dhandle = NULL;

	return (ret);
}
Beispiel #11
0
/*
 * __wt_session_compact --
 *	WT_SESSION.compact method.
 */
int
__wt_session_compact(
    WT_SESSION *wt_session, const char *uri, const char *config)
{
	WT_COMPACT_STATE compact;
	WT_CONFIG_ITEM cval;
	WT_DATA_SOURCE *dsrc;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	u_int i;
	bool ignore_cache_size_set;

	ignore_cache_size_set = false;

	session = (WT_SESSION_IMPL *)wt_session;
	SESSION_API_CALL(session, compact, config, cfg);

	/*
	 * The compaction thread should not block when the cache is full: it is
	 * holding locks blocking checkpoints and once the cache is full, it can
	 * spend a long time doing eviction.
	 */
	if (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE)) {
		ignore_cache_size_set = true;
		F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
	}

	/* In-memory ignores compaction operations. */
	if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
		goto err;

	/*
	 * Non-LSM object compaction requires checkpoints, which are impossible
	 * in transactional contexts. Disallow in all contexts (there's no
	 * reason for LSM to allow this, possible or not), and check now so the
	 * error message isn't confusing.
	 */
	WT_ERR(__wt_txn_context_check(session, false));

	/* Disallow objects in the WiredTiger name space. */
	WT_ERR(__wt_str_name_check(session, uri));

	if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
	    !WT_PREFIX_MATCH(uri, "file:") &&
	    !WT_PREFIX_MATCH(uri, "index:") &&
	    !WT_PREFIX_MATCH(uri, "lsm:") &&
	    !WT_PREFIX_MATCH(uri, "table:")) {
		if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
			ret = dsrc->compact == NULL ?
			    __wt_object_unsupported(session, uri) :
			    dsrc->compact(
			    dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg);
		else
			ret = __wt_bad_object_type(session, uri);
		goto err;
	}

	/* Setup the session handle's compaction state structure. */
	memset(&compact, 0, sizeof(WT_COMPACT_STATE));
	session->compact = &compact;

	/* Compaction can be time-limited. */
	WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval));
	session->compact->max_time = (uint64_t)cval.val;
	__wt_epoch(session, &session->compact->begin);

	/* Find the types of data sources being compacted. */
	WT_WITH_SCHEMA_LOCK(session,
	    ret = __wt_schema_worker(session, uri,
	    __compact_handle_append, __compact_uri_analyze, cfg, 0));
	WT_ERR(ret);

	if (session->compact->lsm_count != 0)
		WT_ERR(__wt_schema_worker(
		    session, uri, NULL, __wt_lsm_compact, cfg, 0));
	if (session->compact->file_count != 0)
		WT_ERR(__compact_worker(session));

err:	session->compact = NULL;

	for (i = 0; i < session->op_handle_next; ++i) {
		WT_WITH_DHANDLE(session, session->op_handle[i],
		    WT_TRET(__compact_end(session)));
		WT_WITH_DHANDLE(session, session->op_handle[i],
		    WT_TRET(__wt_session_release_dhandle(session)));
	}

	__wt_free(session, session->op_handle);
	session->op_handle_allocated = session->op_handle_next = 0;

	/*
	 * Release common session resources (for example, checkpoint may acquire
	 * significant reconciliation structures/memory).
	 */
	WT_TRET(__wt_session_release_resources(session));

	if (ignore_cache_size_set)
		F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);

	if (ret != 0)
		WT_STAT_CONN_INCR(session, session_table_compact_fail);
	else
		WT_STAT_CONN_INCR(session, session_table_compact_success);
	API_END_RET_NOTFOUND_MAP(session, ret);
}
Beispiel #12
0
/*
 * __compact_worker --
 *	Function to alternate between checkpoints and compaction calls.
 */
static int
__compact_worker(WT_SESSION_IMPL *session)
{
	WT_DECL_RET;
	u_int i, loop;
	bool another_pass;

	/*
	 * Reset the handles' compaction skip flag (we don't bother setting
	 * or resetting it when we finish compaction, it's simpler to do it
	 * once, here).
	 */
	for (i = 0; i < session->op_handle_next; ++i)
		session->op_handle[i]->compact_skip = false;

	/*
	 * Perform an initial checkpoint (see this file's leading comment for
	 * details).
	 */
	WT_ERR(__compact_checkpoint(session));

	/*
	 * We compact 10% of a file on each pass (but the overall size of the
	 * file is decreasing each time, so we're not compacting 10% of the
	 * original file each time). Try 100 times (which is clearly more than
	 * we need); quit if we make no progress.
	 */
	for (loop = 0; loop < 100; ++loop) {
		/* Step through the list of files being compacted. */
		for (another_pass = false,
		    i = 0; i < session->op_handle_next; ++i) {
			/* Skip objects where there's no more work. */
			if (session->op_handle[i]->compact_skip)
				continue;

			session->compact_state = WT_COMPACT_RUNNING;
			WT_WITH_DHANDLE(session,
			    session->op_handle[i], ret = __wt_compact(session));

			/*
			 * If successful and we did work, schedule another pass.
			 * If successful and we did no work, skip this file in
			 * the future.
			 */
			if (ret == 0) {
				if (session->
				    compact_state == WT_COMPACT_SUCCESS)
					another_pass = true;
				else
					session->
					    op_handle[i]->compact_skip = true;
				continue;
			}

			/*
			 * If compaction failed because checkpoint was running,
			 * continue with the next handle. We might continue to
			 * race with checkpoint on each handle, but that's OK,
			 * we'll step through all the handles, and then we'll
			 * block until a checkpoint completes.
			 *
			 * Just quit if eviction is the problem.
			 */
			if (ret == EBUSY) {
				if (__wt_cache_stuck(session)) {
					WT_ERR_MSG(session, EBUSY,
					    "compaction halted by eviction "
					    "pressure");
				}
				ret = 0;
				another_pass = true;
			}
			WT_ERR(ret);
		}
		if (!another_pass)
			break;

		/*
		 * Perform two checkpoints (see this file's leading comment for
		 * details).
		 */
		WT_ERR(__compact_checkpoint(session));
		WT_ERR(__compact_checkpoint(session));
	}

err:	session->compact_state = WT_COMPACT_NONE;

	return (ret);
}
Beispiel #13
0
/*
 * __sweep --
 *	Close unused dhandles on the connection dhandle list.
 */
static int
__sweep(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DATA_HANDLE *dhandle, *dhandle_next;
	WT_DECL_RET;
	time_t now;
	int locked;

	conn = S2C(session);

	/* Don't discard handles that have been open recently. */
	WT_RET(__wt_seconds(session, &now));

	WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
	dhandle = SLIST_FIRST(&conn->dhlh);
	for (; dhandle != NULL; dhandle = dhandle_next) {
		dhandle_next = SLIST_NEXT(dhandle, l);
		if (WT_IS_METADATA(dhandle))
			continue;
		if (dhandle->session_inuse != 0 ||
		    now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT)
			continue;
		if (dhandle->timeofdeath == 0) {
			dhandle->timeofdeath = now;
			WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
			continue;
		}

		/*
		 * We have a candidate for closing; if it's open, acquire an
		 * exclusive lock on the handle and close it. We might be
		 * blocking opens for a long time (over disk I/O), but the
		 * handle was quiescent for awhile.
		 *
		 * The close can fail if an update cannot be written (updates
		 * in a no-longer-referenced file might not yet be globally
		 * visible if sessions have disjoint sets of files open).  If
		 * the handle is busy, skip it, we'll retry the close the next
		 * time, after the transaction state has progressed.
		 *
		 * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
		 * opens to block on us rather than returning an EBUSY error to
		 * the application.
		 */
		if ((ret =
		    __wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
			continue;
		WT_RET(ret);
		locked = 1;

		/* If the handle is open, try to close it. */
		if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
			WT_WITH_DHANDLE(session, dhandle,
			    ret = __wt_conn_btree_sync_and_close(session, 0));
			if (ret != 0)
				goto unlock;

			/* We closed the btree handle, bump the statistic. */
			WT_STAT_FAST_CONN_INCR(session, dh_conn_handles);
		}

		/*
		 * If there are no longer any references to the handle in any
		 * sessions, attempt to discard it.  The called function
		 * re-checks that the handle is not in use, which is why we
		 * don't do any special handling of EBUSY returns above.
		 */
		if (dhandle->session_inuse == 0 && dhandle->session_ref == 0) {
			WT_WITH_DHANDLE(session, dhandle,
			    ret = __wt_conn_dhandle_discard_single(session, 0));
			if (ret != 0)
				goto unlock;

			/* If the handle was discarded, it isn't locked. */
			locked = 0;
		} else
			WT_STAT_FAST_CONN_INCR(session, dh_conn_ref);

unlock:		if (locked)
			WT_TRET(__wt_writeunlock(session, dhandle->rwlock));

		WT_RET_BUSY_OK(ret);
	}
	return (0);
}
Beispiel #14
0
/*
 * __wt_meta_track_off --
 *	Turn off metadata operation tracking, unrolling on error.
 */
int
__wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
{
	WT_DECL_RET;
	WT_META_TRACK *trk, *trk_orig;
	WT_SESSION_IMPL *ckpt_session;
	int saved_ret;
	bool did_drop;

	saved_ret = 0;

	WT_ASSERT(session,
	    WT_META_TRACKING(session) && session->meta_track_nest > 0);

	trk_orig = session->meta_track;
	trk = session->meta_track_next;

	/* If it was a nested transaction, there is nothing to do. */
	if (--session->meta_track_nest != 0)
		return (0);

	/* Turn off tracking for unroll. */
	session->meta_track_next = session->meta_track_sub = NULL;

	/*
	 * If there were no operations logged, skip unnecessary metadata
	 * checkpoints.  For example, this happens if attempting to create a
	 * data source that already exists (or drop one that doesn't).
	 */
	if (trk == trk_orig)
		goto err;

	/* Unrolling doesn't require syncing the metadata. */
	if (unroll)
		goto err;

	if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
		F_CLR(session, WT_SESSION_SCHEMA_TXN);
#ifdef WT_ENABLE_SCHEMA_TXN
		WT_ERR(__wt_txn_commit(session, NULL));
		__wt_errx(session, "TRACK: Commit internal schema txn");
#endif
	}

	/*
	 * If we don't have the metadata cursor (e.g, we're in the process of
	 * creating the metadata), we can't sync it.
	 */
	if (!need_sync || session->meta_cursor == NULL ||
	    F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
		goto err;

	/* If we're logging, make sure the metadata update was flushed. */
	if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
		WT_WITH_DHANDLE(session,
		    WT_SESSION_META_DHANDLE(session),
		    ret = __wt_txn_checkpoint_log(
		    session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
	else {
		WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
		ckpt_session = S2C(session)->meta_ckpt_session;
		/*
		 * If this operation is part of a running transaction, that
		 * should be included in the checkpoint.
		 */
		ckpt_session->txn.id = session->txn.id;
		WT_ASSERT(session,
		    !F_ISSET(session, WT_SESSION_LOCKED_METADATA));
		WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session),
		    WT_WITH_METADATA_LOCK(ckpt_session,
			ret = __wt_checkpoint(ckpt_session, NULL)));
		ckpt_session->txn.id = WT_TXN_NONE;
		if (ret == 0)
			WT_WITH_DHANDLE(session,
			    WT_SESSION_META_DHANDLE(session),
			    ret = __wt_checkpoint_sync(session, NULL));
	}

err:	/*
	 * Undo any tracked operations on failure.
	 * Apply any tracked operations post-commit.
	 */
	did_drop = false;
	if (unroll || ret != 0) {
		saved_ret = ret;
		ret = 0;
		while (--trk >= trk_orig) {
			did_drop = did_drop || trk->op == WT_ST_DROP_COMMIT;
			WT_TRET(__meta_track_unroll(session, trk));
		}
	} else
		for (; trk_orig < trk; trk_orig++) {
			did_drop = did_drop ||
			    trk_orig->op == WT_ST_DROP_COMMIT;
			WT_TRET(__meta_track_apply(session, trk_orig));
		}

	if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
		F_CLR(session, WT_SESSION_SCHEMA_TXN);
		/*
		 * We should have committed above unless we're unrolling, there
		 * was an error or the operation was a noop.
		 */
		WT_ASSERT(session, unroll || saved_ret != 0 ||
		    session->txn.mod_count == 0);
#ifdef WT_ENABLE_SCHEMA_TXN
		__wt_err(session, saved_ret,
		    "TRACK: Abort internal schema txn");
		WT_TRET(__wt_txn_rollback(session, NULL));
#endif
	}

	/*
	 * Wake up the sweep thread: particularly for the in-memory
	 * storage engine, we want to reclaim space immediately.
	 */
	if (did_drop && S2C(session)->sweep_cond != NULL)
		__wt_cond_signal(session, S2C(session)->sweep_cond);

	if (ret != 0)
		WT_PANIC_RET(session, ret,
		    "failed to apply or unroll all tracked operations");
	return (saved_ret == 0 ? 0 : saved_ret);
}
Beispiel #15
0
/*
 * __drop_table --
 *	WT_SESSION::drop for a table.
 */
static int
__drop_table(
    WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
{
	WT_COLGROUP *colgroup;
	WT_DECL_RET;
	WT_INDEX *idx;
	WT_TABLE *table;
	u_int i;
	const char *name;
	bool tracked;

	WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));

	name = uri;
	WT_PREFIX_SKIP_REQUIRED(session, name, "table:");

	table = NULL;
	tracked = false;

	/*
	 * Open the table so we can drop its column groups and indexes.
	 *
	 * Ideally we would keep the table locked exclusive across the drop,
	 * but for now we rely on the global table lock to prevent the table
	 * being reopened while it is being dropped.  One issue is that the
	 * WT_WITHOUT_LOCKS macro can drop and reacquire the global table lock,
	 * avoiding deadlocks while waiting for LSM operation to quiesce.
	 *
	 * Temporarily getting the table exclusively serves the purpose
	 * of ensuring that cursors on the table that are already open
	 * must at least be closed before this call proceeds.
	 */
	WT_ERR(__wt_schema_get_table_uri(session, uri, true,
	    WT_DHANDLE_EXCLUSIVE, &table));
	WT_ERR(__wt_schema_release_table(session, table));
	WT_ERR(__wt_schema_get_table_uri(session, uri, true, 0, &table));

	/* Drop the column groups. */
	for (i = 0; i < WT_COLGROUPS(table); i++) {
		if ((colgroup = table->cgroups[i]) == NULL)
			continue;
		/*
		 * Drop the column group before updating the metadata to avoid
		 * the metadata for the table becoming inconsistent if we can't
		 * get exclusive access.
		 */
		WT_ERR(__wt_schema_drop(session, colgroup->source, cfg));
		WT_ERR(__wt_metadata_remove(session, colgroup->name));
	}

	/* Drop the indices. */
	WT_ERR(__wt_schema_open_indices(session, table));
	for (i = 0; i < table->nindices; i++) {
		if ((idx = table->indices[i]) == NULL)
			continue;
		/*
		 * Drop the index before updating the metadata to avoid
		 * the metadata for the table becoming inconsistent if we can't
		 * get exclusive access.
		 */
		WT_ERR(__wt_schema_drop(session, idx->source, cfg));
		WT_ERR(__wt_metadata_remove(session, idx->name));
	}

	/* Make sure the table data handle is closed. */
	WT_TRET(__wt_schema_release_table(session, table));
	WT_ERR(__wt_schema_get_table_uri(
	    session, uri, true, WT_DHANDLE_EXCLUSIVE, &table));
	F_SET(&table->iface, WT_DHANDLE_DISCARD);
	if (WT_META_TRACKING(session)) {
		WT_WITH_DHANDLE(session, &table->iface,
		    ret = __wt_meta_track_handle_lock(session, false));
		WT_ERR(ret);
		tracked = true;
	}

	/* Remove the metadata entry (ignore missing items). */
	WT_ERR(__wt_metadata_remove(session, uri));

err:	if (table != NULL && !tracked)
		WT_TRET(__wt_schema_release_table(session, table));
	return (ret);
}