Exemplo n.º 1
0
/*
 * __ckpt_server_start --
 *	Start the checkpoint server thread.
 */
static int
__ckpt_server_start(WT_CONNECTION_IMPL *conn)
{
	WT_SESSION_IMPL *session;

	/* Nothing to do if the server is already running. */
	if (conn->ckpt_session != NULL)
		return (0);

	F_SET(conn, WT_CONN_SERVER_CHECKPOINT);
	/* The checkpoint server gets its own session. */
	WT_RET(__wt_open_internal_session(
	    conn, "checkpoint-server", 1, 1, &conn->ckpt_session));
	session = conn->ckpt_session;

	/*
	 * Checkpoint does enough I/O it may be called upon to perform slow
	 * operations for the block manager.
	 */
	F_SET(session, WT_SESSION_CAN_WAIT);

	WT_RET(
	    __wt_cond_alloc(session, "checkpoint server", 0, &conn->ckpt_cond));

	/*
	 * Start the thread.
	 */
	WT_RET(__wt_thread_create(
	    session, &conn->ckpt_tid, __ckpt_server, session));
	conn->ckpt_tid_set = 1;

	return (0);
}
Exemplo n.º 2
0
/*
 * __wt_las_create --
 *	Initialize the database's lookaside store.
 */
int
__wt_las_create(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	uint32_t session_flags;
	const char *drop_cfg[] = {
	    WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL };

	conn = S2C(session);

	/*
	 * Done at startup: we cannot do it on demand because we require the
	 * schema lock to create and drop the table, and it may not always be
	 * available.
	 *
	 * Discard any previous incarnation of the table.
	 */
	WT_RET(__wt_session_drop(session, WT_LAS_URI, drop_cfg));

	/* Re-create the table. */
	WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT));

	/*
	 * Open a shared internal session used to access the lookaside table.
	 * This session should never be tapped for eviction.
	 */
	session_flags = WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION;
	WT_RET(__wt_open_internal_session(
	    conn, "lookaside table", true, session_flags, &conn->las_session));

	/* Flag that the lookaside table has been created. */
	F_SET(conn, WT_CONN_LAS_OPEN);

	return (0);
}
Exemplo n.º 3
0
/*
 * __wt_sweep_create --
 *	Start the handle sweep thread.
 */
int
__wt_sweep_create(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);

	/* Set first, the thread might run before we finish up. */
	F_SET(conn, WT_CONN_SERVER_SWEEP);

	WT_RET(__wt_open_internal_session(
	    conn, "sweep-server", 1, 1, &conn->sweep_session));
	session = conn->sweep_session;

	/*
	 * Handle sweep does enough I/O it may be called upon to perform slow
	 * operations for the block manager.
	 */
	F_SET(session, WT_SESSION_CAN_WAIT);

	WT_RET(__wt_cond_alloc(
	    session, "handle sweep server", 0, &conn->sweep_cond));

	WT_RET(__wt_thread_create(
	    session, &conn->sweep_tid, __sweep_server, session));
	conn->sweep_tid_set = 1;

	return (0);
}
Exemplo n.º 4
0
/*
 * __ckpt_server_start --
 *	Start the checkpoint server thread.
 */
static int
__ckpt_server_start(WT_CONNECTION_IMPL *conn)
{
	WT_SESSION_IMPL *session;

	session = conn->default_session;

	/* Nothing to do if the server is already running. */
	if (conn->ckpt_session != NULL)
		return (0);

	F_SET(conn, WT_CONN_SERVER_CHECKPOINT);
	/* The checkpoint server gets its own session. */
	WT_RET(__wt_open_internal_session(
	    conn, "checkpoint-server", 1, 1, &conn->ckpt_session));

	WT_RET(
	    __wt_cond_alloc(session, "checkpoint server", 0, &conn->ckpt_cond));

	/*
	 * Start the thread.
	 */
	WT_RET(__wt_thread_create(
	    session, &conn->ckpt_tid, __ckpt_server, conn->ckpt_session));
	conn->ckpt_tid_set = 1;

	return (0);
}
Exemplo n.º 5
0
/*
 * __wt_connection_open --
 *	Open a connection.
 */
int
__wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
{
	WT_SESSION_IMPL *session;

	/* Default session. */
	session = conn->default_session;
	WT_ASSERT(session, session->iface.connection == &conn->iface);

	/*
	 * Tell internal server threads to run: this must be set before opening
	 * any sessions.
	 */
	F_SET(conn, WT_CONN_SERVER_RUN | WT_CONN_LOG_SERVER_RUN);

	/* WT_SESSION_IMPL array. */
	WT_RET(__wt_calloc(session,
	    conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions));
	WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->sessions);

	/*
	 * Open the default session.  We open this before starting service
	 * threads because those may allocate and use session resources that
	 * need to get cleaned up on close.
	 */
	WT_RET(__wt_open_internal_session(
	    conn, "connection", false, 0, &session));

	/*
	 * The connection's default session is originally a static structure,
	 * swap that out for a more fully-functional session.  It's necessary
	 * to have this step: the session allocation code uses the connection's
	 * session, and if we pass a reference to the default session as the
	 * place to store the allocated session, things get confused and error
	 * handling can be corrupted.  So, we allocate into a stack variable
	 * and then assign it on success.
	 */
	conn->default_session = session;

	/*
	 * Publish: there must be a barrier to ensure the connection structure
	 * fields are set before other threads read from the pointer.
	 */
	WT_WRITE_BARRIER();

	/* Create the cache. */
	WT_RET(__wt_cache_create(session, cfg));

	/* Initialize transaction support. */
	WT_RET(__wt_txn_global_init(session, cfg));

	return (0);
}
Exemplo n.º 6
0
/*
 * __wt_meta_track_init --
 *	Initialize metadata tracking.
 */
int
__wt_meta_track_init(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
		WT_RET(__wt_open_internal_session(conn,
		    "metadata-ckpt", false, WT_SESSION_NO_DATA_HANDLES,
		    &conn->meta_ckpt_session));

		/*
		 * Sessions default to read-committed isolation, we rely on
		 * that for the correctness of metadata checkpoints.
		 */
		WT_ASSERT(session, conn->meta_ckpt_session->txn.isolation ==
		    WT_ISO_READ_COMMITTED);
	}

	return (0);
}
Exemplo n.º 7
0
/*
 * __logmgr_force_ckpt --
 *	Force a checkpoint out, waiting for the checkpoint LSN in the log
 *	is up to the given log number.
 */
static int
__logmgr_force_ckpt(WT_SESSION_IMPL *session, uint32_t lognum)
{
	WT_CONNECTION_IMPL *conn;
	WT_LOG *log;
	WT_SESSION_IMPL *tmp_session;
	int yield;

	conn = S2C(session);
	log = conn->log;
	yield = 0;
	WT_RET(__wt_open_internal_session(conn,
	    "compatibility-reconfig", true, 0, &tmp_session));
	while (log->ckpt_lsn.l.file < lognum) {
		/*
		 * Force a checkpoint to be written in the new log file and
		 * force the archiving of all previous log files.  We do the
		 * checkpoint in the loop because the checkpoint LSN in the
		 * log record could still reflect the previous log file in
		 * cases such as the write LSN has not yet advanced into the
		 * new log file due to another group of threads still in
		 * progress with their slot copies or writes.
		 */
		WT_RET(tmp_session->iface.checkpoint(
		    &tmp_session->iface, "force=1"));
		WT_RET(WT_SESSION_CHECK_PANIC(tmp_session));
		/*
		 * Only sleep in the rare case that we had to come through
		 * this loop more than once.
		 */
		if (yield++) {
			WT_STAT_CONN_INCR(session, log_force_ckpt_sleep);
			__wt_sleep(0, WT_THOUSAND);
		}
	}
	WT_RET(tmp_session->iface.close(&tmp_session->iface, NULL));
	return (0);
}
Exemplo n.º 8
0
/*
 * __wt_las_create --
 *	Initialize the database's lookaside store.
 */
int
__wt_las_create(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	const char *drop_cfg[] = {
	    WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL };

	conn = S2C(session);

	/*
	 * Done at startup: we cannot do it on demand because we require the
	 * schema lock to create and drop the file, and it may not always be
	 * available.
	 *
	 * Open an internal session, used for the shared lookaside cursor.
	 *
	 * Sessions associated with a lookaside cursor should never be tapped
	 * for eviction.
	 */
	WT_RET(__wt_open_internal_session(
	    conn, "lookaside table", 1, 1, &conn->las_session));
	session = conn->las_session;
	F_SET(session, WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION);

	/* Discard any previous incarnation of the file. */
	WT_RET(__wt_session_drop(session, WT_LAS_URI, drop_cfg));

	/* Re-create the file. */
	WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT));

	/* Open the shared cursor. */
	WT_WITHOUT_DHANDLE(session,
	    ret = __las_cursor_create(session, &conn->las_cursor));

	return (ret);
}
Exemplo n.º 9
0
/*
 * __wt_logmgr_open --
 *	Start the log service threads.
 */
int
__wt_logmgr_open(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);

	/* If no log thread services are configured, we're done. */ 
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
		return (0);

	/*
	 * Start the log close thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	WT_RET(__wt_open_internal_session(
	    conn, "log-close-server", 0, 0, &conn->log_file_session));
	WT_RET(__wt_cond_alloc(conn->log_file_session,
	    "log close server", 0, &conn->log_file_cond));

	/*
	 * Start the log file close thread.
	 */
	WT_RET(__wt_thread_create(conn->log_file_session,
	    &conn->log_file_tid, __log_file_server, conn->log_file_session));
	conn->log_file_tid_set = 1;

	/*
	 * Start the log write LSN thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	WT_RET(__wt_open_internal_session(
	    conn, "log-wrlsn-server", 0, 0, &conn->log_wrlsn_session));
	WT_RET(__wt_cond_alloc(conn->log_wrlsn_session,
	    "log write lsn server", 0, &conn->log_wrlsn_cond));
	WT_RET(__wt_thread_create(conn->log_wrlsn_session,
	    &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session));
	conn->log_wrlsn_tid_set = 1;

	/* If no log thread services are configured, we're done. */ 
	if (!FLD_ISSET(conn->log_flags,
	    (WT_CONN_LOG_ARCHIVE | WT_CONN_LOG_PREALLOC)))
		return (0);

	/*
	 * If a log server thread exists, the user may have reconfigured
	 * archiving or pre-allocation.  Signal the thread.  Otherwise the
	 * user wants archiving and/or allocation and we need to start up
	 * the thread.
	 */
	if (conn->log_session != NULL) {
		WT_ASSERT(session, conn->log_cond != NULL);
		WT_ASSERT(session, conn->log_tid_set != 0);
		WT_RET(__wt_cond_signal(session, conn->log_cond));
	} else {
		/* The log server gets its own session. */
		WT_RET(__wt_open_internal_session(
		    conn, "log-server", 0, 0, &conn->log_session));
		WT_RET(__wt_cond_alloc(conn->log_session,
		    "log server", 0, &conn->log_cond));

		/*
		 * Start the thread.
		 */
		WT_RET(__wt_thread_create(conn->log_session,
		    &conn->log_tid, __log_server, conn->log_session));
		conn->log_tid_set = 1;
	}

	return (0);
}
Exemplo n.º 10
0
/*
 * __wt_txn_recover --
 *	Run recovery.
 */
int
__wt_txn_recover(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *metac;
	WT_DECL_RET;
	WT_RECOVERY r;
	struct WT_RECOVERY_FILE *metafile;
	char *config;
	bool eviction_started, needs_rec, was_backup;

	conn = S2C(session);
	WT_CLEAR(r);
	WT_INIT_LSN(&r.ckpt_lsn);
	eviction_started = false;
	was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);

	/* We need a real session for recovery. */
	WT_RET(__wt_open_internal_session(conn, "txn-recover",
	    false, WT_SESSION_NO_LOGGING, &session));
	r.session = session;

	F_SET(conn, WT_CONN_RECOVERING);
	WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
	WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
	WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac));
	metafile = &r.files[WT_METAFILE_ID];
	metafile->c = metac;

	/*
	 * If no log was found (including if logging is disabled), or if the
	 * last checkpoint was done with logging disabled, recovery should not
	 * run.  Scan the metadata to figure out the largest file ID.
	 */
	if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_EXISTED) ||
	    WT_IS_MAX_LSN(&metafile->ckpt_lsn)) {
		WT_ERR(__recovery_file_scan(&r));
		conn->next_file_id = r.max_fileid;
		goto done;
	}

	/*
	 * First, do a pass through the log to recover the metadata, and
	 * establish the last checkpoint LSN.  Skip this when opening a hot
	 * backup: we already have the correct metadata in that case.
	 */
	if (!was_backup) {
		r.metadata_only = true;
		/*
		 * If this is a read-only connection, check if the checkpoint
		 * LSN in the metadata file is up to date, indicating a clean
		 * shutdown.
		 */
		if (F_ISSET(conn, WT_CONN_READONLY)) {
			WT_ERR(__wt_log_needs_recovery(
			    session, &metafile->ckpt_lsn, &needs_rec));
			if (needs_rec)
				WT_ERR_MSG(session, WT_RUN_RECOVERY,
				    "Read-only database needs recovery");
		}
		if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
			WT_ERR(__wt_log_scan(session,
			    NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r));
		else {
			/*
			 * Start at the last checkpoint LSN referenced in the
			 * metadata.  If we see the end of a checkpoint while
			 * scanning, we will change the full scan to start from
			 * there.
			 */
			r.ckpt_lsn = metafile->ckpt_lsn;
			ret = __wt_log_scan(session,
			    &metafile->ckpt_lsn, 0, __txn_log_recover, &r);
			if (ret == ENOENT)
				ret = 0;
			WT_ERR(ret);
		}
	}

	/* Scan the metadata to find the live files and their IDs. */
	WT_ERR(__recovery_file_scan(&r));

	/*
	 * We no longer need the metadata cursor: close it to avoid pinning any
	 * resources that could block eviction during recovery.
	 */
	r.files[0].c = NULL;
	WT_ERR(metac->close(metac));

	/*
	 * Now, recover all the files apart from the metadata.
	 * Pass WT_LOGSCAN_RECOVER so that old logs get truncated.
	 */
	r.metadata_only = false;
	WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY,
	    "Main recovery loop: starting at %" PRIu32 "/%" PRIu32,
	    r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset));
	WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
	/*
	 * Check if the database was shut down cleanly.  If not
	 * return an error if the user does not want automatic
	 * recovery.
	 */
	if (needs_rec &&
	    (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) ||
	     F_ISSET(conn, WT_CONN_READONLY))) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_ERR_MSG(session, WT_RUN_RECOVERY,
			    "Read-only database needs recovery");
		WT_ERR(WT_RUN_RECOVERY);
	}

	if (F_ISSET(conn, WT_CONN_READONLY))
		goto done;

	/*
	 * Recovery can touch more data than fits in cache, so it relies on
	 * regular eviction to manage paging.  Start eviction threads for
	 * recovery without LAS cursors.
	 */
	WT_ERR(__wt_evict_create(session));
	eviction_started = true;

	/*
	 * Always run recovery even if it was a clean shutdown only if
	 * this is not a read-only connection.
	 * We can consider skipping it in the future.
	 */
	if (WT_IS_INIT_LSN(&r.ckpt_lsn))
		WT_ERR(__wt_log_scan(session, NULL,
		    WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
		    __txn_log_recover, &r));
	else {
		ret = __wt_log_scan(session, &r.ckpt_lsn,
		    WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
		if (ret == ENOENT)
			ret = 0;
		WT_ERR(ret);
	}

	conn->next_file_id = r.max_fileid;

	/*
	 * If recovery ran successfully forcibly log a checkpoint so the next
	 * open is fast and keep the metadata up to date with the checkpoint
	 * LSN and archiving.
	 */
	WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));

done:	FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);
err:	WT_TRET(__recovery_free(&r));
	__wt_free(session, config);

	if (ret != 0)
		__wt_err(session, ret, "Recovery failed");

	/*
	 * Destroy the eviction threads that were started in support of
	 * recovery.  They will be restarted once the lookaside table is
	 * created.
	 */
	if (eviction_started)
		WT_TRET(__wt_evict_destroy(session));

	WT_TRET(session->iface.close(&session->iface, NULL));
	F_CLR(conn, WT_CONN_RECOVERING);

	return (ret);
}
Exemplo n.º 11
0
/*
 * __wt_txn_recover --
 *	Run recovery.
 */
int
__wt_txn_recover(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *metac;
	WT_DECL_RET;
	WT_RECOVERY r;
	WT_RECOVERY_FILE *metafile;
	char *config;
	bool do_checkpoint, eviction_started, needs_rec, was_backup;

	conn = S2C(session);
	WT_CLEAR(r);
	WT_INIT_LSN(&r.ckpt_lsn);
	config = NULL;
	do_checkpoint = true;
	eviction_started = false;
	was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);

	/* We need a real session for recovery. */
	WT_RET(__wt_open_internal_session(conn, "txn-recover",
	    false, WT_SESSION_NO_LOGGING, &session));
	r.session = session;
	WT_MAX_LSN(&r.max_ckpt_lsn);
	WT_MAX_LSN(&r.max_rec_lsn);
	conn->txn_global.recovery_timestamp =
	    conn->txn_global.meta_ckpt_timestamp = 0;

	F_SET(conn, WT_CONN_RECOVERING);
	WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
	WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
	WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac));
	metafile = &r.files[WT_METAFILE_ID];
	metafile->c = metac;

	/*
	 * If no log was found (including if logging is disabled), or if the
	 * last checkpoint was done with logging disabled, recovery should not
	 * run.  Scan the metadata to figure out the largest file ID.
	 */
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) ||
	    WT_IS_MAX_LSN(&metafile->ckpt_lsn)) {
		/*
		 * Detect if we're going from logging disabled to enabled.
		 * We need to know this to verify LSNs and start at the correct
		 * log file later.  If someone ran with logging, then disabled
		 * it and removed all the log files and then turned logging back
		 * on, we have to start logging in the log file number that is
		 * larger than any checkpoint LSN we have from the earlier time.
		 */
		WT_ERR(__recovery_file_scan(&r));
		/*
		 * The array can be re-allocated in recovery_file_scan.  Reset
		 * our pointer after scanning all the files.
		 */
		metafile = &r.files[WT_METAFILE_ID];
		conn->next_file_id = r.max_fileid;

		if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
		    WT_IS_MAX_LSN(&metafile->ckpt_lsn) &&
		    !WT_IS_MAX_LSN(&r.max_ckpt_lsn))
			WT_ERR(__wt_log_reset(session, r.max_ckpt_lsn.l.file));
		else
			do_checkpoint = false;
		goto done;
	}

	/*
	 * First, do a pass through the log to recover the metadata, and
	 * establish the last checkpoint LSN.  Skip this when opening a hot
	 * backup: we already have the correct metadata in that case.
	 *
	 * If we're running with salvage and we hit an error, we ignore it
	 * and continue. In salvage we want to recover whatever part of the
	 * data we can from the last checkpoint up until whatever problem we
	 * detect in the log file. In salvage, we ignore errors from scanning
	 * the log so recovery can continue. Other errors remain errors.
	 */
	if (!was_backup) {
		r.metadata_only = true;
		/*
		 * If this is a read-only connection, check if the checkpoint
		 * LSN in the metadata file is up to date, indicating a clean
		 * shutdown.
		 */
		if (F_ISSET(conn, WT_CONN_READONLY)) {
			WT_ERR(__wt_log_needs_recovery(
			    session, &metafile->ckpt_lsn, &needs_rec));
			if (needs_rec)
				WT_ERR_MSG(session, WT_RUN_RECOVERY,
				    "Read-only database needs recovery");
		}
		if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
			ret = __wt_log_scan(session,
			    NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r);
		else {
			/*
			 * Start at the last checkpoint LSN referenced in the
			 * metadata.  If we see the end of a checkpoint while
			 * scanning, we will change the full scan to start from
			 * there.
			 */
			r.ckpt_lsn = metafile->ckpt_lsn;
			ret = __wt_log_scan(session,
			    &metafile->ckpt_lsn, 0, __txn_log_recover, &r);
		}
		if (F_ISSET(conn, WT_CONN_SALVAGE))
			ret = 0;
		/*
		 * If log scan couldn't find a file we expected to be around,
		 * this indicates a corruption of some sort.
		 */
		if (ret == ENOENT) {
			F_SET(conn, WT_CONN_DATA_CORRUPTION);
			ret = WT_ERROR;
		}

		WT_ERR(ret);
	}

	/* Scan the metadata to find the live files and their IDs. */
	WT_ERR(__recovery_file_scan(&r));
	/*
	 * Clear this out.  We no longer need it and it could have been
	 * re-allocated when scanning the files.
	 */
	WT_NOT_READ(metafile, NULL);

	/*
	 * We no longer need the metadata cursor: close it to avoid pinning any
	 * resources that could block eviction during recovery.
	 */
	r.files[0].c = NULL;
	WT_ERR(metac->close(metac));

	/*
	 * Now, recover all the files apart from the metadata.
	 * Pass WT_LOGSCAN_RECOVER so that old logs get truncated.
	 */
	r.metadata_only = false;
	__wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS,
	    "Main recovery loop: starting at %" PRIu32 "/%" PRIu32
	    " to %" PRIu32 "/%" PRIu32, r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset,
	    r.max_rec_lsn.l.file, r.max_rec_lsn.l.offset);
	WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
	/*
	 * Check if the database was shut down cleanly.  If not
	 * return an error if the user does not want automatic
	 * recovery.
	 */
	if (needs_rec &&
	    (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) ||
	     F_ISSET(conn, WT_CONN_READONLY))) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_ERR_MSG(session, WT_RUN_RECOVERY,
			    "Read-only database needs recovery");
		WT_ERR_MSG(session, WT_RUN_RECOVERY, "Database needs recovery");
	}

	if (F_ISSET(conn, WT_CONN_READONLY)) {
		do_checkpoint = false;
		goto done;
	}

	/*
	 * Recovery can touch more data than fits in cache, so it relies on
	 * regular eviction to manage paging.  Start eviction threads for
	 * recovery without LAS cursors.
	 */
	WT_ERR(__wt_evict_create(session));
	eviction_started = true;

	/*
	 * Always run recovery even if it was a clean shutdown only if
	 * this is not a read-only connection.
	 * We can consider skipping it in the future.
	 */
	if (needs_rec)
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);
	if (WT_IS_INIT_LSN(&r.ckpt_lsn))
		ret = __wt_log_scan(session, NULL,
		    WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
		    __txn_log_recover, &r);
	else
		ret = __wt_log_scan(session, &r.ckpt_lsn,
		    WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
	if (F_ISSET(conn, WT_CONN_SALVAGE))
		ret = 0;
	WT_ERR(ret);

	conn->next_file_id = r.max_fileid;

done:	WT_ERR(__recovery_set_checkpoint_timestamp(&r));
	if (do_checkpoint)
		/*
		 * Forcibly log a checkpoint so the next open is fast and keep
		 * the metadata up to date with the checkpoint LSN and
		 * archiving.
		 */
		WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));

	/*
	 * If we're downgrading and have newer log files, force an archive,
	 * no matter what the archive setting is.
	 */
	if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE))
		WT_ERR(__wt_log_truncate_files(session, NULL, true));
	FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);

err:	WT_TRET(__recovery_free(&r));
	__wt_free(session, config);
	FLD_CLR(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);

	if (ret != 0) {
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_FAILED);
		__wt_err(session, ret, "Recovery failed");
	}

	/*
	 * Destroy the eviction threads that were started in support of
	 * recovery.  They will be restarted once the lookaside table is
	 * created.
	 */
	if (eviction_started)
		WT_TRET(__wt_evict_destroy(session));

	WT_TRET(session->iface.close(&session->iface, NULL));
	F_CLR(conn, WT_CONN_RECOVERING);

	return (ret);
}
Exemplo n.º 12
0
/*
 * __thread_group_resize --
 *	Resize an array of utility threads already holding the lock.
 */
static int
__thread_group_resize(
    WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
    uint32_t new_min, uint32_t new_max, uint32_t flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_THREAD *thread;
	size_t alloc;
	uint32_t i, session_flags;

	conn = S2C(session);
	session_flags = 0;

	WT_ASSERT(session,
	    group->current_threads <= group->alloc &&
	    __wt_rwlock_islocked(session, group->lock));

	if (new_min == group->min && new_max == group->max)
		return (0);

	/*
	 * Coll shrink to reduce the number of thread structures and running
	 * threads if required by the change in group size.
	 */
	WT_RET(__thread_group_shrink(session, group, new_max));

	/*
	 * Only reallocate the thread array if it is the largest ever, since
	 * our realloc doesn't support shrinking the allocated size.
	 */
	if (group->alloc < new_max) {
		alloc = group->alloc * sizeof(*group->threads);
		WT_RET(__wt_realloc(session, &alloc,
		    new_max * sizeof(*group->threads), &group->threads));
		group->alloc = new_max;
	}

	/*
	 * Initialize the structures based on the previous group size, not
	 * the previous allocated size.
	 */
	for (i = group->max; i < new_max; i++) {
		WT_ERR(__wt_calloc_one(session, &thread));
		/*
		 * Threads get their own session and lookaside table cursor
		 * if the lookaside table is open. Note that threads are
		 * started during recovery, before the lookaside table is
		 * created.
		 */
		if (LF_ISSET(WT_THREAD_CAN_WAIT))
			session_flags = WT_SESSION_CAN_WAIT;
		if (F_ISSET(conn, WT_CONN_LAS_OPEN))
			FLD_SET(session_flags, WT_SESSION_LOOKASIDE_CURSOR);
		WT_ERR(__wt_open_internal_session(conn, group->name,
		    false, session_flags, &thread->session));
		if (LF_ISSET(WT_THREAD_PANIC_FAIL))
			F_SET(thread, WT_THREAD_PANIC_FAIL);
		thread->id = i;
		thread->run_func = group->run_func;
		WT_ASSERT(session, group->threads[i] == NULL);
		group->threads[i] = thread;
	}

	if (group->current_threads < new_min)
		WT_ERR(__thread_group_grow(session, group, new_min));

err:	/*
	 * Update the thread group information even on failure to improve our
	 * chances of cleaning up properly.
	 */
	group->max = new_max;
	group->min = new_min;

	/*
	 * An error resizing a thread array is fatal, it should only happen
	 * in an out of memory situation.
	 */
	if (ret != 0) {
		WT_TRET(__wt_thread_group_destroy(session, group));
		WT_PANIC_RET(session, ret, "Error while resizing thread group");
	}
	return (ret);
}
Exemplo n.º 13
0
/*
 * __wt_logmgr_open --
 *	Start the log service threads.
 */
int
__wt_logmgr_open(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	uint32_t session_flags;

	conn = S2C(session);

	/* If no log thread services are configured, we're done. */
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
		return (0);

	F_SET(conn, WT_CONN_SERVER_LOG);

	/*
	 * Start the log close thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	session_flags = WT_SESSION_NO_DATA_HANDLES;
	WT_RET(__wt_open_internal_session(conn,
	    "log-close-server", false, session_flags, &conn->log_file_session));
	WT_RET(__wt_cond_alloc(
	    conn->log_file_session, "log close server", &conn->log_file_cond));

	/*
	 * Start the log file close thread.
	 */
	WT_RET(__wt_thread_create(conn->log_file_session,
	    &conn->log_file_tid, __log_file_server, conn->log_file_session));
	conn->log_file_tid_set = true;

	/*
	 * Start the log write LSN thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server",
	    false, session_flags, &conn->log_wrlsn_session));
	WT_RET(__wt_cond_auto_alloc(conn->log_wrlsn_session,
	    "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond));
	WT_RET(__wt_thread_create(conn->log_wrlsn_session,
	    &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session));
	conn->log_wrlsn_tid_set = true;

	/*
	 * If a log server thread exists, the user may have reconfigured
	 * archiving or pre-allocation.  Signal the thread.  Otherwise the
	 * user wants archiving and/or allocation and we need to start up
	 * the thread.
	 */
	if (conn->log_session != NULL) {
		WT_ASSERT(session, conn->log_cond != NULL);
		WT_ASSERT(session, conn->log_tid_set == true);
		__wt_cond_signal(session, conn->log_cond);
	} else {
		/* The log server gets its own session. */
		WT_RET(__wt_open_internal_session(conn,
		    "log-server", false, session_flags, &conn->log_session));
		WT_RET(__wt_cond_auto_alloc(conn->log_session,
		    "log server", 50000, WT_MILLION, &conn->log_cond));

		/*
		 * Start the thread.
		 */
		WT_RET(__wt_thread_create(conn->log_session,
		    &conn->log_tid, __log_server, conn->log_session));
		conn->log_tid_set = true;
	}

	return (0);
}
Exemplo n.º 14
0
/*
 * __wt_cache_create --
 *	Create the underlying cache.
 */
int
__wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CACHE *cache;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	int i;

	conn = S2C(session);

	WT_ASSERT(session, conn->cache == NULL);

	WT_RET(__wt_calloc_one(session, &conn->cache));

	cache = conn->cache;

	/* Use a common routine for run-time configuration options. */
	WT_RET(__wt_cache_config(session, false, cfg));

	/*
	 * The lowest possible page read-generation has a special meaning, it
	 * marks a page for forcible eviction; don't let it happen by accident.
	 */
	cache->read_gen = WT_READGEN_START_VALUE;

	/*
	 * The target size must be lower than the trigger size or we will never
	 * get any work done.
	 */
	if (cache->eviction_target >= cache->eviction_trigger)
		WT_ERR_MSG(session, EINVAL,
		    "eviction target must be lower than the eviction trigger");

	WT_ERR(__wt_cond_auto_alloc(session, "cache eviction server",
	    false, 10000, WT_MILLION, &cache->evict_cond));
	WT_ERR(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
	WT_ERR(__wt_spin_init(session,
	    &cache->evict_queue_lock, "cache eviction queue"));
	WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
	if ((ret = __wt_open_internal_session(conn, "evict pass",
	    false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
		WT_ERR_MSG(NULL, ret,
		    "Failed to create session for eviction walks");

	/* Allocate the LRU eviction queue. */
	cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR;
	for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
		WT_ERR(__wt_calloc_def(session,
		    cache->evict_slots, &cache->evict_queues[i].evict_queue));
		WT_ERR(__wt_spin_init(session,
		    &cache->evict_queues[i].evict_lock, "cache eviction"));
	}

	/* Ensure there are always non-NULL queues. */
	cache->evict_current_queue = cache->evict_fill_queue =
	    &cache->evict_queues[0];
	cache->evict_other_queue = &cache->evict_queues[1];
	cache->evict_urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];

	/*
	 * We get/set some values in the cache statistics (rather than have
	 * two copies), configure them.
	 */
	__wt_cache_stats_update(session);
	return (0);

err:	WT_RET(__wt_cache_destroy(session));
	return (ret);
}