Пример #1
0
/*
 * __logmgr_sync_cfg --
 *	Interpret the transaction_sync config.
 */
static int
__logmgr_sync_cfg(WT_SESSION_IMPL *session, const char **cfg)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);

	WT_RET(
	    __wt_config_gets(session, cfg, "transaction_sync.enabled", &cval));
	if (cval.val)
		FLD_SET(conn->txn_logsync, WT_LOG_SYNC_ENABLED);
	else
		FLD_CLR(conn->txn_logsync, WT_LOG_SYNC_ENABLED);

	WT_RET(
	    __wt_config_gets(session, cfg, "transaction_sync.method", &cval));
	FLD_CLR(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH | WT_LOG_FSYNC);
	if (WT_STRING_MATCH("dsync", cval.str, cval.len))
		FLD_SET(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH);
	else if (WT_STRING_MATCH("fsync", cval.str, cval.len))
		FLD_SET(conn->txn_logsync, WT_LOG_FSYNC);
	else if (WT_STRING_MATCH("none", cval.str, cval.len))
		FLD_SET(conn->txn_logsync, WT_LOG_FLUSH);
	return (0);
}
Пример #2
0
/*
 * __logmgr_config --
 *	Parse and setup the logging server options.
 */
static int
__logmgr_config(WT_SESSION_IMPL *session, const char **cfg, int *runp)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);

	/*
	 * The logging configuration is off by default.
	 */
	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
	*runp = cval.val != 0;

	/*
	 * Setup a log path, compression and encryption even if logging is
	 * disabled in case we are going to print a log.
	 */
	conn->log_compressor = NULL;
	WT_RET(__wt_config_gets_none(session, cfg, "log.compressor", &cval));
	WT_RET(__wt_compressor_config(session, &cval, &conn->log_compressor));

	WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
	WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path));

	/* We are done if logging isn't enabled. */
	if (*runp == 0)
		return (0);

	WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
	if (cval.val != 0)
		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);

	WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
	conn->log_file_max = (wt_off_t)cval.val;
	WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max);

	WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
	/*
	 * If pre-allocation is configured, set the initial number to one.
	 * We'll adapt as load dictates.
	 */
	if (cval.val != 0) {
		FLD_SET(conn->log_flags, WT_CONN_LOG_PREALLOC);
		conn->log_prealloc = 1;
	}
	WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
	if (cval.len != 0  && WT_STRING_MATCH("error", cval.str, cval.len))
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);

	WT_RET(__logmgr_sync_cfg(session, cfg));
	return (0);
}
Пример #3
0
/*
 * __wt_logmgr_create --
 *	Initialize the log subsystem (before running recovery).
 */
int
__wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONNECTION_IMPL *conn;
	WT_LOG *log;
	bool run;

	conn = S2C(session);

	/* Handle configuration. */
	WT_RET(__logmgr_config(session, cfg, &run, false));

	/* If logging is not configured, we're done. */
	if (!run)
		return (0);

	FLD_SET(conn->log_flags, WT_CONN_LOG_ENABLED);
	/*
	 * Logging is on, allocate the WT_LOG structure and open the log file.
	 */
	WT_RET(__wt_calloc_one(session, &conn->log));
	log = conn->log;
	WT_RET(__wt_spin_init(session, &log->log_lock, "log"));
	WT_RET(__wt_spin_init(session, &log->log_slot_lock, "log slot"));
	WT_RET(__wt_spin_init(session, &log->log_sync_lock, "log sync"));
	WT_RET(__wt_spin_init(session, &log->log_writelsn_lock,
	    "log write LSN"));
	WT_RET(__wt_rwlock_alloc(session,
	    &log->log_archive_lock, "log archive lock"));
	if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_LOG))
		log->allocsize =
		    WT_MAX((uint32_t)conn->buffer_alignment, WT_LOG_ALIGN);
	else
		log->allocsize = WT_LOG_ALIGN;
	WT_INIT_LSN(&log->alloc_lsn);
	WT_INIT_LSN(&log->ckpt_lsn);
	WT_INIT_LSN(&log->first_lsn);
	WT_INIT_LSN(&log->sync_lsn);
	/*
	 * We only use file numbers for directory sync, so this needs to
	 * initialized to zero.
	 */
	WT_ZERO_LSN(&log->sync_dir_lsn);
	WT_INIT_LSN(&log->trunc_lsn);
	WT_INIT_LSN(&log->write_lsn);
	WT_INIT_LSN(&log->write_start_lsn);
	log->fileid = 0;
	WT_RET(__wt_cond_alloc(
	    session, "log sync", false, &log->log_sync_cond));
	WT_RET(__wt_cond_alloc(
	    session, "log write", false, &log->log_write_cond));
	WT_RET(__wt_log_open(session));
	WT_RET(__wt_log_slot_init(session));

	return (0);
}
Пример #4
0
/*
 * __conn_verbose_config --
 *	Set verbose configuration.
 */
static int
__conn_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval, sval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	static struct {
		const char *name;
		uint32_t flag;
	} *ft, verbtypes[] = {
		{ "block",	WT_VERB_block },
		{ "shared_cache",WT_VERB_shared_cache },
		{ "ckpt",	WT_VERB_ckpt },
		{ "evict",	WT_VERB_evict },
		{ "evictserver",WT_VERB_evictserver },
		{ "fileops",	WT_VERB_fileops },
		{ "hazard",	WT_VERB_hazard },
		{ "lsm",	WT_VERB_lsm },
		{ "mutex",	WT_VERB_mutex },
		{ "read",	WT_VERB_read },
		{ "reconcile",	WT_VERB_reconcile },
		{ "salvage",	WT_VERB_salvage },
		{ "verify",	WT_VERB_verify },
		{ "write",	WT_VERB_write },
		{ NULL, 0 }
	};

	conn = S2C(session);

	if ((ret = __wt_config_gets(session, cfg, "verbose", &cval)) != 0)
		return (ret == WT_NOTFOUND ? 0 : ret);
	for (ft = verbtypes; ft->name != NULL; ft++) {
		if ((ret = __wt_config_subgets(
		    session, &cval, ft->name, &sval)) == 0 && sval.val != 0)
			FLD_SET(conn->verbose, ft->flag);
		else
			FLD_CLR(conn->verbose, ft->flag);

		WT_RET_NOTFOUND_OK(ret);
	}
	return (0);
}
Пример #5
0
/*
 * __wt_txn_recover --
 *	Run recovery.
 */
int
__wt_txn_recover(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *metac;
	WT_DECL_RET;
	WT_RECOVERY r;
	struct WT_RECOVERY_FILE *metafile;
	char *config;
	bool eviction_started, needs_rec, was_backup;

	conn = S2C(session);
	WT_CLEAR(r);
	WT_INIT_LSN(&r.ckpt_lsn);
	eviction_started = false;
	was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);

	/* We need a real session for recovery. */
	WT_RET(__wt_open_internal_session(conn, "txn-recover",
	    false, WT_SESSION_NO_LOGGING, &session));
	r.session = session;

	F_SET(conn, WT_CONN_RECOVERING);
	WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
	WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
	WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac));
	metafile = &r.files[WT_METAFILE_ID];
	metafile->c = metac;

	/*
	 * If no log was found (including if logging is disabled), or if the
	 * last checkpoint was done with logging disabled, recovery should not
	 * run.  Scan the metadata to figure out the largest file ID.
	 */
	if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_EXISTED) ||
	    WT_IS_MAX_LSN(&metafile->ckpt_lsn)) {
		WT_ERR(__recovery_file_scan(&r));
		conn->next_file_id = r.max_fileid;
		goto done;
	}

	/*
	 * First, do a pass through the log to recover the metadata, and
	 * establish the last checkpoint LSN.  Skip this when opening a hot
	 * backup: we already have the correct metadata in that case.
	 */
	if (!was_backup) {
		r.metadata_only = true;
		/*
		 * If this is a read-only connection, check if the checkpoint
		 * LSN in the metadata file is up to date, indicating a clean
		 * shutdown.
		 */
		if (F_ISSET(conn, WT_CONN_READONLY)) {
			WT_ERR(__wt_log_needs_recovery(
			    session, &metafile->ckpt_lsn, &needs_rec));
			if (needs_rec)
				WT_ERR_MSG(session, WT_RUN_RECOVERY,
				    "Read-only database needs recovery");
		}
		if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
			WT_ERR(__wt_log_scan(session,
			    NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r));
		else {
			/*
			 * Start at the last checkpoint LSN referenced in the
			 * metadata.  If we see the end of a checkpoint while
			 * scanning, we will change the full scan to start from
			 * there.
			 */
			r.ckpt_lsn = metafile->ckpt_lsn;
			ret = __wt_log_scan(session,
			    &metafile->ckpt_lsn, 0, __txn_log_recover, &r);
			if (ret == ENOENT)
				ret = 0;
			WT_ERR(ret);
		}
	}

	/* Scan the metadata to find the live files and their IDs. */
	WT_ERR(__recovery_file_scan(&r));

	/*
	 * We no longer need the metadata cursor: close it to avoid pinning any
	 * resources that could block eviction during recovery.
	 */
	r.files[0].c = NULL;
	WT_ERR(metac->close(metac));

	/*
	 * Now, recover all the files apart from the metadata.
	 * Pass WT_LOGSCAN_RECOVER so that old logs get truncated.
	 */
	r.metadata_only = false;
	WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY,
	    "Main recovery loop: starting at %" PRIu32 "/%" PRIu32,
	    r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset));
	WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
	/*
	 * Check if the database was shut down cleanly.  If not
	 * return an error if the user does not want automatic
	 * recovery.
	 */
	if (needs_rec &&
	    (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) ||
	     F_ISSET(conn, WT_CONN_READONLY))) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_ERR_MSG(session, WT_RUN_RECOVERY,
			    "Read-only database needs recovery");
		WT_ERR(WT_RUN_RECOVERY);
	}

	if (F_ISSET(conn, WT_CONN_READONLY))
		goto done;

	/*
	 * Recovery can touch more data than fits in cache, so it relies on
	 * regular eviction to manage paging.  Start eviction threads for
	 * recovery without LAS cursors.
	 */
	WT_ERR(__wt_evict_create(session));
	eviction_started = true;

	/*
	 * Always run recovery even if it was a clean shutdown only if
	 * this is not a read-only connection.
	 * We can consider skipping it in the future.
	 */
	if (WT_IS_INIT_LSN(&r.ckpt_lsn))
		WT_ERR(__wt_log_scan(session, NULL,
		    WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
		    __txn_log_recover, &r));
	else {
		ret = __wt_log_scan(session, &r.ckpt_lsn,
		    WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
		if (ret == ENOENT)
			ret = 0;
		WT_ERR(ret);
	}

	conn->next_file_id = r.max_fileid;

	/*
	 * If recovery ran successfully forcibly log a checkpoint so the next
	 * open is fast and keep the metadata up to date with the checkpoint
	 * LSN and archiving.
	 */
	WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));

done:	FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);
err:	WT_TRET(__recovery_free(&r));
	__wt_free(session, config);

	if (ret != 0)
		__wt_err(session, ret, "Recovery failed");

	/*
	 * Destroy the eviction threads that were started in support of
	 * recovery.  They will be restarted once the lookaside table is
	 * created.
	 */
	if (eviction_started)
		WT_TRET(__wt_evict_destroy(session));

	WT_TRET(session->iface.close(&session->iface, NULL));
	F_CLR(conn, WT_CONN_RECOVERING);

	return (ret);
}
Пример #6
0
/*
 * __wt_lsm_tree_create --
 *	Create an LSM tree structure for the given name.
 */
int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
    const char *uri, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	const char *cfg[] = API_CONF_DEFAULTS(session, create, config);
	const char *tmpconfig;

	/* If the tree is open, it already exists. */
	if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) {
		__wt_lsm_tree_release(session, lsm_tree);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If the tree has metadata, it already exists.
	 *
	 * !!!
	 * Use a local variable: we don't care what the existing configuration
	 * is, but we don't want to overwrite the real config.
	 */
	if (__wt_metadata_read(session, uri, &tmpconfig) == 0) {
		__wt_free(session, tmpconfig);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
	if (WT_STRING_MATCH("r", cval.str, cval.len))
		WT_RET_MSG(session, EINVAL,
		    "LSM trees cannot be configured as column stores");

	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));

	WT_RET(__lsm_tree_set_name(session, lsm_tree, uri));

	WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->key_format));
	WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->value_format));

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom", &cval));
	FLD_SET(lsm_tree->bloom,
	    (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_newest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST);
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_oldest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);

	if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
	    (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST) ||
	    FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)))
		WT_ERR_MSG(session, EINVAL,
		    "Bloom filters can only be created on newest and oldest "
		    "chunks if bloom filters are enabled");

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_config", &cval));
	if (cval.type == ITEM_STRUCT) {
		cval.str++;
		cval.len -= 2;
	}
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->bloom_config));

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_bit_count", &cval));
	lsm_tree->bloom_bit_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_hash_count", &cval));
	lsm_tree->bloom_hash_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_chunk_size", &cval));
	lsm_tree->chunk_size = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_max", &cval));
	lsm_tree->merge_max = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_threads", &cval));
	lsm_tree->merge_threads = (uint32_t)cval.val;
	/* Sanity check that api_data.py is in sync with lsm.h */
	WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS);

	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "%s,key_format=u,value_format=u", config));
	lsm_tree->file_config = __wt_buf_steal(session, buf, NULL);

	/* Create the first chunk and flush the metadata. */
	WT_ERR(__wt_lsm_meta_write(session, lsm_tree));

	/* Discard our partially populated handle. */
	ret = __lsm_tree_discard(session, lsm_tree);
	lsm_tree = NULL;

	/*
	 * Open our new tree and add it to the handle cache. Don't discard on
	 * error: the returned handle is NULL on error, and the metadata
	 * tracking macros handle cleaning up on failure.
	 */
	if (ret == 0)
		ret = __lsm_tree_open(session, uri, &lsm_tree);
	if (ret == 0)
		__wt_lsm_tree_release(session, lsm_tree);

	if (0) {
err:		WT_TRET(__lsm_tree_discard(session, lsm_tree));
	}
	__wt_scr_free(&buf);
	return (ret);
}
Пример #7
0
/*
 * __logmgr_config --
 *	Parse and setup the logging server options.
 */
static int
__logmgr_config(
    WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	bool enabled;

	conn = S2C(session);

	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
	enabled = cval.val != 0;

	/*
	 * If we're reconfiguring, enabled must match the already
	 * existing setting.
	 *
	 * If it is off and the user it turning it on, or it is on
	 * and the user is turning it off, return an error.
	 */
	if (reconfig &&
	    ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
	    (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))))
		return (EINVAL);

	/* Logging is incompatible with in-memory */
	if (enabled) {
		WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
		if (cval.val != 0)
			WT_RET_MSG(session, EINVAL,
			    "In memory configuration incompatible with "
			    "log=(enabled=true)");
	}

	*runp = enabled;

	/*
	 * Setup a log path and compression even if logging is disabled in case
	 * we are going to print a log.  Only do this on creation.  Once a
	 * compressor or log path are set they cannot be changed.
	 */
	if (!reconfig) {
		conn->log_compressor = NULL;
		WT_RET(__wt_config_gets_none(
		    session, cfg, "log.compressor", &cval));
		WT_RET(__wt_compressor_config(
		    session, &cval, &conn->log_compressor));

		WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
		WT_RET(__wt_strndup(
		    session, cval.str, cval.len, &conn->log_path));
	}
	/* We are done if logging isn't enabled. */
	if (!*runp)
		return (0);

	WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
	if (cval.val != 0)
		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);

	if (!reconfig) {
		/*
		 * Ignore if the user tries to change the file size.  The
		 * amount of memory allocated to the log slots may be based
		 * on the log file size at creation and we don't want to
		 * re-allocate that memory while running.
		 */
		WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
		conn->log_file_max = (wt_off_t)cval.val;
		WT_STAT_FAST_CONN_SET(session,
		    log_max_filesize, conn->log_file_max);
	}

	/*
	 * If pre-allocation is configured, set the initial number to a few.
	 * We'll adapt as load dictates.
	 */
	WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
	if (cval.val != 0)
		conn->log_prealloc = 1;

	/*
	 * Note that it is meaningless to reconfigure this value during
	 * runtime.  It only matters on create before recovery runs.
	 */
	WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
	if (cval.len != 0  && WT_STRING_MATCH("error", cval.str, cval.len))
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);

	WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
	if (cval.val != 0) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_RET_MSG(session, EINVAL,
			    "Read-only configuration incompatible with "
			    "zero-filling log files");
		FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
	}

	WT_RET(__logmgr_sync_cfg(session, cfg));
	if (conn->log_cond != NULL)
		WT_RET(__wt_cond_auto_signal(session, conn->log_cond));
	return (0);
}
Пример #8
0
/*
 * wiredtiger_open --
 *	Main library entry point: open a new connection to a WiredTiger
 *	database.
 */
int
wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
    const char *config, WT_CONNECTION **wt_connp)
{
	static WT_CONNECTION stdc = {
		__conn_close,
		__conn_reconfigure,
		__conn_get_home,
		__conn_is_new,
		__conn_open_session,
		__conn_load_extension,
		__conn_add_data_source,
		__conn_add_collator,
		__conn_add_compressor,
		__conn_add_extractor
	};
	static struct {
		const char *name;
		uint32_t flag;
	} *ft, directio_types[] = {
		{ "data",	WT_DIRECTIO_DATA },
		{ "log",	WT_DIRECTIO_LOG },
		{ NULL, 0 }
	};
	WT_CONFIG subconfig;
	WT_CONFIG_ITEM cval, skey, sval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_ITEM *cbuf, expath, exconfig;
	WT_SESSION_IMPL *session;
	const char *cfg[] =
	    { __wt_confdfl_wiredtiger_open, config, NULL, NULL, NULL };
	int exist;

	*wt_connp = NULL;
	session = NULL;
	cbuf = NULL;
	WT_CLEAR(expath);
	WT_CLEAR(exconfig);

	WT_RET(__wt_library_init());

	WT_RET(__wt_calloc_def(NULL, 1, &conn));
	conn->iface = stdc;

	/*
	 * Immediately link the structure into the connection structure list:
	 * the only thing ever looked at on that list is the database name,
	 * and a NULL value is fine.
	 */
	__wt_spin_lock(NULL, &__wt_process.spinlock);
	TAILQ_INSERT_TAIL(&__wt_process.connqh, conn, q);
	__wt_spin_unlock(NULL, &__wt_process.spinlock);

	session = conn->default_session = &conn->dummy_session;
	session->iface.connection = &conn->iface;
	session->name = "wiredtiger_open";
	__wt_event_handler_set(session, event_handler);

	/* Remaining basic initialization of the connection structure. */
	WT_ERR(__wt_connection_init(conn));

	/* Check the configuration strings. */
	WT_ERR(__wt_config_check(
	    session, __wt_confchk_wiredtiger_open, config, 0));

	/* Get the database home. */
	WT_ERR(__conn_home(session, home, cfg));

	/* Make sure no other thread of control already owns this database. */
	WT_ERR(__conn_single(session, cfg));

	/* Read the database-home configuration file. */
	WT_ERR(__conn_config_file(session, cfg, &cbuf));

	/* Read the environment variable configuration. */
	WT_ERR(__conn_config_env(session, cfg));

	WT_ERR(__wt_config_gets(session, cfg, "hazard_max", &cval));
	conn->hazard_max = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "session_max", &cval));
	conn->session_size = (uint32_t)cval.val + WT_NUM_INTERNAL_SESSIONS;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_merge", &cval));
	if (cval.val)
		F_SET(conn, WT_CONN_LSM_MERGE);
	WT_ERR(__wt_config_gets(session, cfg, "sync", &cval));
	if (cval.val)
		F_SET(conn, WT_CONN_SYNC);
	WT_ERR(__wt_config_gets(session, cfg, "transactional", &cval));
	if (cval.val)
		F_SET(conn, WT_CONN_TRANSACTIONAL);

	/* Configure verbose flags. */
	WT_ERR(__conn_verbose_config(session, cfg));

	WT_ERR(__wt_conn_cache_pool_config(session, cfg));

	WT_ERR(__wt_config_gets(session, cfg, "logging", &cval));
	if (cval.val != 0)
		WT_ERR(__wt_open(
		   session, WT_LOG_FILENAME, 1, 0, 0, &conn->log_fh));

	/* Configure direct I/O and buffer alignment. */
	WT_ERR(__wt_config_gets(session, cfg, "buffer_alignment", &cval));
	if (cval.val == -1)
		conn->buffer_alignment = WT_BUFFER_ALIGNMENT_DEFAULT;
	else
		conn->buffer_alignment = (size_t)cval.val;
#ifndef HAVE_POSIX_MEMALIGN
	if (conn->buffer_alignment != 0)
		WT_ERR_MSG(session, EINVAL,
		    "buffer_alignment requires posix_memalign");
#endif

	/*
	 * Configuration: direct_io, mmap, statistics.
	 */
	WT_ERR(__wt_config_gets(session, cfg, "direct_io", &cval));
	for (ft = directio_types; ft->name != NULL; ft++) {
		ret = __wt_config_subgets(session, &cval, ft->name, &sval);
		if (ret == 0) {
			if (sval.val)
				FLD_SET(conn->direct_io, ft->flag);
		} else if (ret != WT_NOTFOUND)
			goto err;
	}
	WT_ERR(__wt_config_gets(session, cfg, "mmap", &cval));
	conn->mmap = cval.val == 0 ? 0 : 1;
	WT_ERR(__wt_config_gets(session, cfg, "statistics", &cval));
	conn->statistics = cval.val == 0 ? 0 : 1;

	/* Load any extensions referenced in the config. */
	WT_ERR(__wt_config_gets(session, cfg, "extensions", &cval));
	WT_ERR(__wt_config_subinit(session, &subconfig, &cval));
	while ((ret = __wt_config_next(&subconfig, &skey, &sval)) == 0) {
		WT_ERR(__wt_buf_fmt(
		    session, &expath, "%.*s", (int)skey.len, skey.str));
		if (sval.len > 0)
			WT_ERR(__wt_buf_fmt(session, &exconfig,
			    "entry=%.*s\n", (int)sval.len, sval.str));
		WT_ERR(conn->iface.load_extension(&conn->iface,
		    expath.data, (sval.len > 0) ? exconfig.data : NULL));
	}
	WT_ERR_NOTFOUND_OK(ret);

	/*
	 * Open the connection; if that fails, the connection handle has been
	 * destroyed by the time the open function returns.
	 */
	if ((ret = __wt_connection_open(conn, cfg)) != 0) {
		conn = NULL;
		WT_ERR(ret);
	}

	/* Open the default session. */
	WT_ERR(__wt_open_session(conn, 1, NULL, NULL, &conn->default_session));
	session = conn->default_session;

	/*
	 * Check on the turtle and metadata files, creating them if necessary
	 * (which avoids application threads racing to create the metadata file
	 * later).
	 */
	WT_ERR(__wt_meta_turtle_init(session, &exist));
	if (!exist) {
		/*
		 * We're single-threaded, but acquire the schema lock
		 * regardless: the lower level code checks that it is
		 * appropriately synchronized.
		 */
		WT_WITH_SCHEMA_LOCK(session,
		    ret = __wt_schema_create(session, WT_METADATA_URI, NULL));
		WT_ERR(ret);
	}
	WT_ERR(__wt_metadata_open(session));

	/* If there's a hot-backup file, load it. */
	WT_ERR(__wt_metadata_load_backup(session));

	/*
	 * XXX LSM initialization.
	 * This is structured so that it could be moved to an extension.
	 */
	WT_ERR(__wt_lsm_init(&conn->iface, NULL));

	STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0);
	*wt_connp = &conn->iface;

	/*
	 * Destroying the connection on error will destroy our session handle,
	 * cleanup using the session handle first, then discard the connection.
	 */
err:	if (cbuf != NULL)
		__wt_buf_free(session, cbuf);
	__wt_buf_free(session, &expath);
	__wt_buf_free(session, &exconfig);

	if (ret != 0 && conn != NULL)
		WT_TRET(__wt_connection_destroy(conn));

	/* Let the server threads proceed. */
	if (ret == 0)
		conn->connection_initialized = 1;

	return (ret);
}
Пример #9
0
/*
 * __wt_lsm_tree_create --
 *	Create an LSM tree structure for the given name.
 */
int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
    const char *uri, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	const char *cfg[] =
	    { WT_CONFIG_BASE(session, session_create), config, NULL };
	char *tmpconfig;

	/* If the tree is open, it already exists. */
	if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) {
		__wt_lsm_tree_release(session, lsm_tree);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If the tree has metadata, it already exists.
	 *
	 * !!!
	 * Use a local variable: we don't care what the existing configuration
	 * is, but we don't want to overwrite the real config.
	 */
	if (__wt_metadata_search(session, uri, &tmpconfig) == 0) {
		__wt_free(session, tmpconfig);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
	if (WT_STRING_MATCH("r", cval.str, cval.len))
		WT_RET_MSG(session, EINVAL,
		    "LSM trees cannot be configured as column stores");

	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));

	WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));

	WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->key_format));
	WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->value_format));

	WT_ERR(__wt_config_gets(session, cfg, "collator", &cval));
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->collator_name));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval));
	if (cval.val)
		F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
	else
		F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval));
	FLD_SET(lsm_tree->bloom,
	    (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);

	if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
	    FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
		WT_ERR_MSG(session, EINVAL,
		    "Bloom filters can only be created on newest and oldest "
		    "chunks if bloom filters are enabled");

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval));
	if (cval.type == WT_CONFIG_ITEM_STRUCT) {
		cval.str++;
		cval.len -= 2;
	}
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->bloom_config));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval));
	lsm_tree->bloom_bit_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval));
	lsm_tree->bloom_hash_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval));
	lsm_tree->chunk_max = (uint64_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval));
	lsm_tree->chunk_size = (uint64_t)cval.val;
	if (lsm_tree->chunk_size > lsm_tree->chunk_max)
		WT_ERR_MSG(session, EINVAL,
		    "Chunk size (chunk_size) must be smaller than or equal to "
		    "the maximum chunk size (chunk_max)");
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval));
	lsm_tree->merge_max = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_min", &cval));
	lsm_tree->merge_min = (uint32_t)cval.val;
	if (lsm_tree->merge_min > lsm_tree->merge_max)
		WT_ERR_MSG(session, EINVAL,
		    "LSM merge_min must be less than or equal to merge_max");

	/*
	 * Set up the config for each chunk.
	 *
	 * Make the memory_page_max double the chunk size, so application
	 * threads don't immediately try to force evict the chunk when the
	 * worker thread clears the NO_EVICTION flag.
	 */
	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "%s,key_format=u,value_format=u,memory_page_max=%" PRIu64,
	    config, 2 * lsm_tree->chunk_max));
	WT_ERR(__wt_strndup(
	    session, buf->data, buf->size, &lsm_tree->file_config));

	/* Create the first chunk and flush the metadata. */
	WT_ERR(__wt_lsm_meta_write(session, lsm_tree));

	/* Discard our partially populated handle. */
	ret = __lsm_tree_discard(session, lsm_tree);
	lsm_tree = NULL;

	/*
	 * Open our new tree and add it to the handle cache. Don't discard on
	 * error: the returned handle is NULL on error, and the metadata
	 * tracking macros handle cleaning up on failure.
	 */
	if (ret == 0)
		ret = __lsm_tree_open(session, uri, &lsm_tree);
	if (ret == 0)
		__wt_lsm_tree_release(session, lsm_tree);

	if (0) {
err:		WT_TRET(__lsm_tree_discard(session, lsm_tree));
	}
	__wt_scr_free(&buf);
	return (ret);
}
Пример #10
0
/*
 * __wt_txn_recover --
 *	Run recovery.
 */
int
__wt_txn_recover(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *metac;
	WT_DECL_RET;
	WT_RECOVERY r;
	WT_RECOVERY_FILE *metafile;
	char *config;
	bool do_checkpoint, eviction_started, needs_rec, was_backup;

	conn = S2C(session);
	WT_CLEAR(r);
	WT_INIT_LSN(&r.ckpt_lsn);
	config = NULL;
	do_checkpoint = true;
	eviction_started = false;
	was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);

	/* We need a real session for recovery. */
	WT_RET(__wt_open_internal_session(conn, "txn-recover",
	    false, WT_SESSION_NO_LOGGING, &session));
	r.session = session;
	WT_MAX_LSN(&r.max_ckpt_lsn);
	WT_MAX_LSN(&r.max_rec_lsn);
	conn->txn_global.recovery_timestamp =
	    conn->txn_global.meta_ckpt_timestamp = 0;

	F_SET(conn, WT_CONN_RECOVERING);
	WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
	WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
	WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac));
	metafile = &r.files[WT_METAFILE_ID];
	metafile->c = metac;

	/*
	 * If no log was found (including if logging is disabled), or if the
	 * last checkpoint was done with logging disabled, recovery should not
	 * run.  Scan the metadata to figure out the largest file ID.
	 */
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) ||
	    WT_IS_MAX_LSN(&metafile->ckpt_lsn)) {
		/*
		 * Detect if we're going from logging disabled to enabled.
		 * We need to know this to verify LSNs and start at the correct
		 * log file later.  If someone ran with logging, then disabled
		 * it and removed all the log files and then turned logging back
		 * on, we have to start logging in the log file number that is
		 * larger than any checkpoint LSN we have from the earlier time.
		 */
		WT_ERR(__recovery_file_scan(&r));
		/*
		 * The array can be re-allocated in recovery_file_scan.  Reset
		 * our pointer after scanning all the files.
		 */
		metafile = &r.files[WT_METAFILE_ID];
		conn->next_file_id = r.max_fileid;

		if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
		    WT_IS_MAX_LSN(&metafile->ckpt_lsn) &&
		    !WT_IS_MAX_LSN(&r.max_ckpt_lsn))
			WT_ERR(__wt_log_reset(session, r.max_ckpt_lsn.l.file));
		else
			do_checkpoint = false;
		goto done;
	}

	/*
	 * First, do a pass through the log to recover the metadata, and
	 * establish the last checkpoint LSN.  Skip this when opening a hot
	 * backup: we already have the correct metadata in that case.
	 *
	 * If we're running with salvage and we hit an error, we ignore it
	 * and continue. In salvage we want to recover whatever part of the
	 * data we can from the last checkpoint up until whatever problem we
	 * detect in the log file. In salvage, we ignore errors from scanning
	 * the log so recovery can continue. Other errors remain errors.
	 */
	if (!was_backup) {
		r.metadata_only = true;
		/*
		 * If this is a read-only connection, check if the checkpoint
		 * LSN in the metadata file is up to date, indicating a clean
		 * shutdown.
		 */
		if (F_ISSET(conn, WT_CONN_READONLY)) {
			WT_ERR(__wt_log_needs_recovery(
			    session, &metafile->ckpt_lsn, &needs_rec));
			if (needs_rec)
				WT_ERR_MSG(session, WT_RUN_RECOVERY,
				    "Read-only database needs recovery");
		}
		if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
			ret = __wt_log_scan(session,
			    NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r);
		else {
			/*
			 * Start at the last checkpoint LSN referenced in the
			 * metadata.  If we see the end of a checkpoint while
			 * scanning, we will change the full scan to start from
			 * there.
			 */
			r.ckpt_lsn = metafile->ckpt_lsn;
			ret = __wt_log_scan(session,
			    &metafile->ckpt_lsn, 0, __txn_log_recover, &r);
		}
		if (F_ISSET(conn, WT_CONN_SALVAGE))
			ret = 0;
		/*
		 * If log scan couldn't find a file we expected to be around,
		 * this indicates a corruption of some sort.
		 */
		if (ret == ENOENT) {
			F_SET(conn, WT_CONN_DATA_CORRUPTION);
			ret = WT_ERROR;
		}

		WT_ERR(ret);
	}

	/* Scan the metadata to find the live files and their IDs. */
	WT_ERR(__recovery_file_scan(&r));
	/*
	 * Clear this out.  We no longer need it and it could have been
	 * re-allocated when scanning the files.
	 */
	WT_NOT_READ(metafile, NULL);

	/*
	 * We no longer need the metadata cursor: close it to avoid pinning any
	 * resources that could block eviction during recovery.
	 */
	r.files[0].c = NULL;
	WT_ERR(metac->close(metac));

	/*
	 * Now, recover all the files apart from the metadata.
	 * Pass WT_LOGSCAN_RECOVER so that old logs get truncated.
	 */
	r.metadata_only = false;
	__wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS,
	    "Main recovery loop: starting at %" PRIu32 "/%" PRIu32
	    " to %" PRIu32 "/%" PRIu32, r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset,
	    r.max_rec_lsn.l.file, r.max_rec_lsn.l.offset);
	WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
	/*
	 * Check if the database was shut down cleanly.  If not
	 * return an error if the user does not want automatic
	 * recovery.
	 */
	if (needs_rec &&
	    (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) ||
	     F_ISSET(conn, WT_CONN_READONLY))) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_ERR_MSG(session, WT_RUN_RECOVERY,
			    "Read-only database needs recovery");
		WT_ERR_MSG(session, WT_RUN_RECOVERY, "Database needs recovery");
	}

	if (F_ISSET(conn, WT_CONN_READONLY)) {
		do_checkpoint = false;
		goto done;
	}

	/*
	 * Recovery can touch more data than fits in cache, so it relies on
	 * regular eviction to manage paging.  Start eviction threads for
	 * recovery without LAS cursors.
	 */
	WT_ERR(__wt_evict_create(session));
	eviction_started = true;

	/*
	 * Always run recovery even if it was a clean shutdown only if
	 * this is not a read-only connection.
	 * We can consider skipping it in the future.
	 */
	if (needs_rec)
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);
	if (WT_IS_INIT_LSN(&r.ckpt_lsn))
		ret = __wt_log_scan(session, NULL,
		    WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
		    __txn_log_recover, &r);
	else
		ret = __wt_log_scan(session, &r.ckpt_lsn,
		    WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
	if (F_ISSET(conn, WT_CONN_SALVAGE))
		ret = 0;
	WT_ERR(ret);

	conn->next_file_id = r.max_fileid;

done:	WT_ERR(__recovery_set_checkpoint_timestamp(&r));
	if (do_checkpoint)
		/*
		 * Forcibly log a checkpoint so the next open is fast and keep
		 * the metadata up to date with the checkpoint LSN and
		 * archiving.
		 */
		WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));

	/*
	 * If we're downgrading and have newer log files, force an archive,
	 * no matter what the archive setting is.
	 */
	if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE))
		WT_ERR(__wt_log_truncate_files(session, NULL, true));
	FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);

err:	WT_TRET(__recovery_free(&r));
	__wt_free(session, config);
	FLD_CLR(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY);

	if (ret != 0) {
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_FAILED);
		__wt_err(session, ret, "Recovery failed");
	}

	/*
	 * Destroy the eviction threads that were started in support of
	 * recovery.  They will be restarted once the lookaside table is
	 * created.
	 */
	if (eviction_started)
		WT_TRET(__wt_evict_destroy(session));

	WT_TRET(session->iface.close(&session->iface, NULL));
	F_CLR(conn, WT_CONN_RECOVERING);

	return (ret);
}
Пример #11
0
/*
 * __logmgr_config --
 *	Parse and setup the logging server options.
 */
static int
__logmgr_config(
    WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	bool enabled;

	/*
	 * A note on reconfiguration: the standard "is this configuration string
	 * allowed" checks should fail if reconfiguration has invalid strings,
	 * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
	 * the connection reconfiguration method doesn't allow those strings.
	 * Additionally, the base configuration values during reconfiguration
	 * are the currently configured values (so we don't revert to default
	 * values when repeatedly reconfiguring), and configuration processing
	 * of a currently set value should not change the currently set value.
	 *
	 * In this code path, log server reconfiguration does not stop/restart
	 * the log server, so there's no point in re-evaluating configuration
	 * strings that cannot be reconfigured, risking bugs in configuration
	 * setup, and depending on evaluation of currently set values to always
	 * result in the currently set value. Skip tests for any configuration
	 * strings which don't make sense during reconfiguration, but don't
	 * worry about error reporting because it should never happen.
	 */

	conn = S2C(session);

	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
	enabled = cval.val != 0;

	/*
	 * If we're reconfiguring, enabled must match the already
	 * existing setting.
	 *
	 * If it is off and the user it turning it on, or it is on
	 * and the user is turning it off, return an error.
	 *
	 * See above: should never happen.
	 */
	if (reconfig &&
	    ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
	    (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))))
		return (EINVAL);

	/* Logging is incompatible with in-memory */
	if (enabled) {
		WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
		if (cval.val != 0)
			WT_RET_MSG(session, EINVAL,
			    "In memory configuration incompatible with "
			    "log=(enabled=true)");
	}

	*runp = enabled;

	/*
	 * Setup a log path and compression even if logging is disabled in case
	 * we are going to print a log.  Only do this on creation.  Once a
	 * compressor or log path are set they cannot be changed.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		conn->log_compressor = NULL;
		WT_RET(__wt_config_gets_none(
		    session, cfg, "log.compressor", &cval));
		WT_RET(__wt_compressor_config(
		    session, &cval, &conn->log_compressor));

		WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
		WT_RET(__wt_strndup(
		    session, cval.str, cval.len, &conn->log_path));
	}

	/* We are done if logging isn't enabled. */
	if (!*runp)
		return (0);

	WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
	if (cval.val != 0)
		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);

	/*
	 * The file size cannot be reconfigured. The amount of memory allocated
	 * to the log slots may be based on the log file size at creation and we
	 * don't want to re-allocate that memory while running.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
		conn->log_file_max = (wt_off_t)cval.val;
		WT_STAT_FAST_CONN_SET(session,
		    log_max_filesize, conn->log_file_max);
	}

	/*
	 * If pre-allocation is configured, set the initial number to a few.
	 * We'll adapt as load dictates.
	 */
	WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
	if (cval.val != 0)
		conn->log_prealloc = 1;

	/*
	 * Note it's meaningless to reconfigure this value during runtime, it
	 * only matters on create before recovery runs.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		WT_RET(__wt_config_gets_def(
		    session, cfg, "log.recover", 0, &cval));
		if (WT_STRING_MATCH("error", cval.str, cval.len))
			FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
	}

	WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
	if (cval.val != 0) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_RET_MSG(session, EINVAL,
			    "Read-only configuration incompatible with "
			    "zero-filling log files");
		FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
	}

	WT_RET(__logmgr_sync_cfg(session, cfg));
	if (conn->log_cond != NULL)
		WT_RET(__wt_cond_auto_signal(session, conn->log_cond));
	return (0);
}
Пример #12
0
/*
 * __thread_group_resize --
 *	Resize an array of utility threads already holding the lock.
 */
static int
__thread_group_resize(
    WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
    uint32_t new_min, uint32_t new_max, uint32_t flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_THREAD *thread;
	size_t alloc;
	uint32_t i, session_flags;

	conn = S2C(session);
	session_flags = 0;

	WT_ASSERT(session,
	    group->current_threads <= group->alloc &&
	    __wt_rwlock_islocked(session, group->lock));

	if (new_min == group->min && new_max == group->max)
		return (0);

	/*
	 * Coll shrink to reduce the number of thread structures and running
	 * threads if required by the change in group size.
	 */
	WT_RET(__thread_group_shrink(session, group, new_max));

	/*
	 * Only reallocate the thread array if it is the largest ever, since
	 * our realloc doesn't support shrinking the allocated size.
	 */
	if (group->alloc < new_max) {
		alloc = group->alloc * sizeof(*group->threads);
		WT_RET(__wt_realloc(session, &alloc,
		    new_max * sizeof(*group->threads), &group->threads));
		group->alloc = new_max;
	}

	/*
	 * Initialize the structures based on the previous group size, not
	 * the previous allocated size.
	 */
	for (i = group->max; i < new_max; i++) {
		WT_ERR(__wt_calloc_one(session, &thread));
		/*
		 * Threads get their own session and lookaside table cursor
		 * if the lookaside table is open. Note that threads are
		 * started during recovery, before the lookaside table is
		 * created.
		 */
		if (LF_ISSET(WT_THREAD_CAN_WAIT))
			session_flags = WT_SESSION_CAN_WAIT;
		if (F_ISSET(conn, WT_CONN_LAS_OPEN))
			FLD_SET(session_flags, WT_SESSION_LOOKASIDE_CURSOR);
		WT_ERR(__wt_open_internal_session(conn, group->name,
		    false, session_flags, &thread->session));
		if (LF_ISSET(WT_THREAD_PANIC_FAIL))
			F_SET(thread, WT_THREAD_PANIC_FAIL);
		thread->id = i;
		thread->run_func = group->run_func;
		WT_ASSERT(session, group->threads[i] == NULL);
		group->threads[i] = thread;
	}

	if (group->current_threads < new_min)
		WT_ERR(__thread_group_grow(session, group, new_min));

err:	/*
	 * Update the thread group information even on failure to improve our
	 * chances of cleaning up properly.
	 */
	group->max = new_max;
	group->min = new_min;

	/*
	 * An error resizing a thread array is fatal, it should only happen
	 * in an out of memory situation.
	 */
	if (ret != 0) {
		WT_TRET(__wt_thread_group_destroy(session, group));
		WT_PANIC_RET(session, ret, "Error while resizing thread group");
	}
	return (ret);
}
Пример #13
0
/*
 * __wt_lsm_tree_create --
 *	Create an LSM tree structure for the given name.
 */
int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
    const char *uri, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	const char *cfg[] =
	    { WT_CONFIG_BASE(session, session_create), config, NULL };
	const char *tmpconfig;

	/* If the tree is open, it already exists. */
	if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) {
		__wt_lsm_tree_release(session, lsm_tree);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If the tree has metadata, it already exists.
	 *
	 * !!!
	 * Use a local variable: we don't care what the existing configuration
	 * is, but we don't want to overwrite the real config.
	 */
	if (__wt_metadata_search(session, uri, &tmpconfig) == 0) {
		__wt_free(session, tmpconfig);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
	if (WT_STRING_MATCH("r", cval.str, cval.len))
		WT_RET_MSG(session, EINVAL,
		    "LSM trees cannot be configured as column stores");

	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));

	WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));

	WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->key_format));
	WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->value_format));

	WT_ERR(__wt_config_gets(session, cfg, "collator", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->collator_name));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval));
	if (cval.val)
		F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
	else
		F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval));
	FLD_SET(lsm_tree->bloom,
	    (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);

	if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
	    FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
		WT_ERR_MSG(session, EINVAL,
		    "Bloom filters can only be created on newest and oldest "
		    "chunks if bloom filters are enabled");

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval));
	if (cval.type == WT_CONFIG_ITEM_STRUCT) {
		cval.str++;
		cval.len -= 2;
	}
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->bloom_config));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval));
	lsm_tree->bloom_bit_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval));
	lsm_tree->bloom_hash_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval));
	lsm_tree->chunk_max = (uint64_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval));
	lsm_tree->chunk_size = (uint64_t)cval.val;
	if (lsm_tree->chunk_size > lsm_tree->chunk_max)
		WT_ERR_MSG(session, EINVAL,
		    "Chunk size (chunk_size) must be smaller than or equal to "
		    "the maximum chunk size (chunk_max)");
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval));
	lsm_tree->merge_max = (uint32_t)cval.val;
	lsm_tree->merge_min = lsm_tree->merge_max / 2;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_threads", &cval));
	lsm_tree->merge_threads = (uint32_t)cval.val;
	/* Sanity check that api_data.py is in sync with lsm.h */
	WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS);

	/*
	 * Set up the config for each chunk.  If possible, avoid high latencies
	 * from fsync by flushing the cache every 8MB (will be overridden by
	 * any application setting).
	 */
	tmpconfig = "";
#ifdef HAVE_SYNC_FILE_RANGE
	if (!S2C(session)->direct_io)
		tmpconfig = "os_cache_dirty_max=8MB,";
#endif
	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "%s%s,key_format=u,value_format=u", tmpconfig, config));
	lsm_tree->file_config = __wt_buf_steal(session, buf, NULL);

	/* Create the first chunk and flush the metadata. */
	WT_ERR(__wt_lsm_meta_write(session, lsm_tree));

	/* Discard our partially populated handle. */
	ret = __lsm_tree_discard(session, lsm_tree);
	lsm_tree = NULL;

	/*
	 * Open our new tree and add it to the handle cache. Don't discard on
	 * error: the returned handle is NULL on error, and the metadata
	 * tracking macros handle cleaning up on failure.
	 */
	if (ret == 0)
		ret = __lsm_tree_open(session, uri, &lsm_tree);
	if (ret == 0)
		__wt_lsm_tree_release(session, lsm_tree);

	if (0) {
err:		WT_TRET(__lsm_tree_discard(session, lsm_tree));
	}
	__wt_scr_free(&buf);
	return (ret);
}
Пример #14
0
/*
 * __wt_txn_recover --
 *	Run recovery.
 */
int
__wt_txn_recover(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *metac;
	WT_DECL_RET;
	WT_RECOVERY r;
	struct WT_RECOVERY_FILE *metafile;
	char *config;
	int needs_rec, was_backup;

	conn = S2C(session);
	WT_CLEAR(r);
	WT_INIT_LSN(&r.ckpt_lsn);
	was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP) ? 1 : 0;

	/* We need a real session for recovery. */
	WT_RET(__wt_open_session(conn, NULL, NULL, 1, &session));
	F_SET(session, WT_SESSION_NO_LOGGING);
	r.session = session;

	WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
	WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config));
	WT_ERR(__wt_metadata_cursor(session, NULL, &metac));
	metafile = &r.files[WT_METAFILE_ID];
	metafile->c = metac;

	/*
	 * If no log was found (including if logging is disabled), or if the
	 * last checkpoint was done with logging disabled, recovery should not
	 * run.  Scan the metadata to figure out the largest file ID.
	 */
	if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_EXISTED) ||
	    WT_IS_MAX_LSN(&metafile->ckpt_lsn)) {
		WT_ERR(__recovery_file_scan(&r));
		conn->next_file_id = r.max_fileid;
		goto done;
	}

	/*
	 * First, do a pass through the log to recover the metadata, and
	 * establish the last checkpoint LSN.  Skip this when opening a hot
	 * backup: we already have the correct metadata in that case.
	 */
	if (!was_backup) {
		r.metadata_only = 1;
		if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
			WT_ERR(__wt_log_scan(session,
			    NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r));
		else {
			/*
			 * Start at the last checkpoint LSN referenced in the
			 * metadata.  If we see the end of a checkpoint while
			 * scanning, we will change the full scan to start from
			 * there.
			 */
			r.ckpt_lsn = metafile->ckpt_lsn;
			ret = __wt_log_scan(session,
			    &metafile->ckpt_lsn, 0, __txn_log_recover, &r);
			if (ret == ENOENT)
				ret = 0;
			WT_ERR(ret);
		}
	}

	/* Scan the metadata to find the live files and their IDs. */
	WT_ERR(__recovery_file_scan(&r));

	/*
	 * We no longer need the metadata cursor: close it to avoid pinning any
	 * resources that could block eviction during recovery.
	 */
	r.files[0].c = NULL;
	WT_ERR(metac->close(metac));

	/*
	 * Now, recover all the files apart from the metadata.
	 * Pass WT_LOGSCAN_RECOVER so that old logs get truncated.
	 */
	r.metadata_only = 0;
	WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY,
	    "Main recovery loop: starting at %u/%" PRIuMAX,
	    r.ckpt_lsn.file, (uintmax_t)r.ckpt_lsn.offset));
	WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
	/*
	 * Check if the database was shut down cleanly.  If not
	 * return an error if the user does not want automatic
	 * recovery.
	 */
	if (needs_rec && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR))
		WT_ERR(WT_RUN_RECOVERY);
	/*
	 * Always run recovery even if it was a clean shutdown.
	 * We can consider skipping it in the future.
	 */
	if (WT_IS_INIT_LSN(&r.ckpt_lsn))
		WT_ERR(__wt_log_scan(session, NULL,
		    WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
		    __txn_log_recover, &r));
	else {
		ret = __wt_log_scan(session, &r.ckpt_lsn,
		    WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
		if (ret == ENOENT)
			ret = 0;
		WT_ERR(ret);
	}

	conn->next_file_id = r.max_fileid;

	/*
	 * If recovery ran successfully forcibly log a checkpoint so the next
	 * open is fast and keep the metadata up to date with the checkpoint
	 * LSN and archiving.
	 */
	WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));

done:	FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE);
err:	WT_TRET(__recovery_free(&r));
	__wt_free(session, config);
	WT_TRET(session->iface.close(&session->iface, NULL));

	return (ret);
}