Beispiel #1
0
/*
 * __logmgr_sync_cfg --
 *	Interpret the transaction_sync config.
 */
static int
__logmgr_sync_cfg(WT_SESSION_IMPL *session, const char **cfg)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);

	WT_RET(
	    __wt_config_gets(session, cfg, "transaction_sync.enabled", &cval));
	if (cval.val)
		FLD_SET(conn->txn_logsync, WT_LOG_SYNC_ENABLED);
	else
		FLD_CLR(conn->txn_logsync, WT_LOG_SYNC_ENABLED);

	WT_RET(
	    __wt_config_gets(session, cfg, "transaction_sync.method", &cval));
	FLD_CLR(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH | WT_LOG_FSYNC);
	if (WT_STRING_MATCH("dsync", cval.str, cval.len))
		FLD_SET(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH);
	else if (WT_STRING_MATCH("fsync", cval.str, cval.len))
		FLD_SET(conn->txn_logsync, WT_LOG_FSYNC);
	else if (WT_STRING_MATCH("none", cval.str, cval.len))
		FLD_SET(conn->txn_logsync, WT_LOG_FLUSH);
	return (0);
}
Beispiel #2
0
/*
 * __wt_txn_begin --
 *	Begin a transaction.
 */
int
__wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *txn_state;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = &txn_global->states[session->id];

	WT_ASSERT(session, txn_state->id == WT_TXN_NONE);

	WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval));
	if (cval.len == 0)
		txn->isolation = session->isolation;
	else
		txn->isolation =
		    WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
		    TXN_ISO_SNAPSHOT :
		    WT_STRING_MATCH("read-committed", cval.str, cval.len) ?
		    TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED;

	/*
	 * Allocate a transaction ID.
	 *
	 * We use an atomic compare and swap to ensure that we get a
	 * unique ID that is published before the global counter is
	 * updated.
	 *
	 * If two threads race to allocate an ID, only the latest ID
	 * will proceed.  The winning thread can be sure its snapshot
	 * contains all of the earlier active IDs.  Threads that race
	 * and get an earlier ID may not appear in the snapshot, but
	 * they will loop and allocate a new ID before proceeding to
	 * make any updates.
	 *
	 * This potentially wastes transaction IDs when threads race to
	 * begin transactions: that is the price we pay to keep this
	 * path latch free.
	 */
	do {
		txn_state->id = txn->id = txn_global->current;
	} while (!WT_ATOMIC_CAS(txn_global->current, txn->id, txn->id + 1));

	/*
	 * If we have used 64-bits of transaction IDs, there is nothing
	 * more we can do.
	 */
	if (txn->id == WT_TXN_ABORTED)
		WT_RET_MSG(session, ENOMEM, "Out of transaction IDs");

	F_SET(txn, TXN_RUNNING);
	if (txn->isolation == TXN_ISO_SNAPSHOT)
		__wt_txn_refresh(session, WT_TXN_NONE, 1);
	return (0);
}
Beispiel #3
0
/*
 * __session_reconfigure --
 *	WT_SESSION->reconfigure method.
 */
static int
__session_reconfigure(WT_SESSION *wt_session, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = (WT_SESSION_IMPL *)wt_session;
	SESSION_API_CALL(session, reconfigure, config, cfg);

	if (F_ISSET(&session->txn, TXN_RUNNING))
		WT_ERR_MSG(session, EINVAL, "transaction in progress");

	WT_TRET(__session_reset_cursors(session));

	WT_ERR(__wt_config_gets_defno(session, cfg, "isolation", &cval));
	if (cval.len != 0) {
		if (!F_ISSET(S2C(session), WT_CONN_TRANSACTIONAL))
			WT_ERR_MSG(session, EINVAL,
			    "Database not configured for transactions");

		session->isolation = session->txn.isolation =
		    WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
		    TXN_ISO_SNAPSHOT :
		    WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ?
		    TXN_ISO_READ_UNCOMMITTED : TXN_ISO_READ_COMMITTED;
	}

err:	API_END_NOTFOUND_MAP(session, ret);
}
Beispiel #4
0
/*
 * __wt_txn_config --
 *	Configure a transaction.
 */
int
__wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_TXN *txn;

	txn = &session->txn;

	WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval));
	if (cval.len != 0)
		txn->isolation =
		    WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
		    WT_ISO_SNAPSHOT :
		    WT_STRING_MATCH("read-committed", cval.str, cval.len) ?
		    WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED;

	/*
	 * The default sync setting is inherited from the connection, but can
	 * be overridden by an explicit "sync" setting for this transaction.
	 *
	 * We want to distinguish between inheriting implicitly and explicitly.
	 */
	F_CLR(txn, WT_TXN_SYNC_SET);
	WT_RET(__wt_config_gets_def(
	    session, cfg, "sync", (int)UINT_MAX, &cval));
	if (cval.val == 0 || cval.val == 1)
		/*
		 * This is an explicit setting of sync.  Set the flag so
		 * that we know not to overwrite it in commit_transaction.
		 */
		F_SET(txn, WT_TXN_SYNC_SET);

	/*
	 * If sync is turned off explicitly, clear the transaction's sync field.
	 */
	if (cval.val == 0)
		txn->txn_logsync = 0;

	WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval));
	if (cval.len > 0)
		/*
		 * The layering here isn't ideal - the named snapshot get
		 * function does both validation and setup. Otherwise we'd
		 * need to walk the list of named snapshots twice during
		 * transaction open.
		 */
		WT_RET(__wt_txn_named_snapshot_get(session, &cval));

	return (0);
}
Beispiel #5
0
/*
 * __wt_huffman_confchk --
 *	Verify Huffman configuration.
 */
int
__wt_huffman_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v)
{
	if (v->len == 0)
		return (0);

	/* Standard Huffman encodings, no work to be done. */
	if (WT_STRING_MATCH("english", v->str, v->len))
		return (0);
	if (WT_STRING_MATCH("none", v->str, v->len))
		return (0);

	return (__huffman_confchk_file(session, v, NULL, NULL));
}
Beispiel #6
0
/*
 * __wt_schema_index_source --
 *	Get the URI of the data source for an index.
 */
int
__wt_schema_index_source(WT_SESSION_IMPL *session,
    WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_RET;
	size_t len;
	const char *prefix, *suffix, *tablename;

	tablename = table->name + strlen("table:");
	if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 &&
	    !WT_STRING_MATCH("file", cval.str, cval.len)) {
		prefix = cval.str;
		len = cval.len;
		suffix = "_idx";
	} else {
		prefix = "file";
		len = strlen(prefix);
		suffix = ".wti";
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_buf_fmt(session, buf, "%.*s:%s_%s%s",
	    (int)len, prefix, tablename, idxname, suffix));

	return (0);
}
Beispiel #7
0
/*
 * __curmetadata_search_near --
 *	WT_CURSOR->search_near method for the metadata cursor type.
 */
static int
__curmetadata_search_near(WT_CURSOR *cursor, int *exact)
{
	WT_CURSOR *file_cursor;
	WT_CURSOR_METADATA *mdc;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	mdc = (WT_CURSOR_METADATA *)cursor;
	file_cursor = mdc->file_cursor;
	CURSOR_API_CALL(cursor, session,
	    search_near, ((WT_CURSOR_BTREE *)file_cursor)->btree);

	WT_MD_CURSOR_NEEDKEY(cursor);

	if (WT_STRING_MATCH(
	    (char *)cursor->key.data, "metadata:", cursor->key.size - 1)) {
		WT_ERR(__curmetadata_metadata_search(session, cursor));
		*exact = 1;
	} else {
		WT_ERR(file_cursor->search_near(file_cursor, exact));
		WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor);
	}

err:	if (ret != 0) {
		F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA);
		F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
	}
	API_END(session, ret);
	return (ret);
}
Beispiel #8
0
/*
 * __wt_config_getones_none --
 *	Get the value for a given string key from a single config string.
 * Treat "none" as empty.
 */
int
__wt_config_getones_none(WT_SESSION_IMPL *session,
    const char *config, const char *key, WT_CONFIG_ITEM *value)
{
	WT_RET(__wt_config_getones(session, config, key, value));
	if (WT_STRING_MATCH("none", value->str, value->len))
		value->len = 0;
	return (0);
}
Beispiel #9
0
/*
 * __wt_curfile_open --
 *	WT_SESSION->open_cursor method for the btree cursor type.
 */
int
__wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
                  WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
    WT_CONFIG_ITEM cval;
    WT_DECL_RET;
    uint32_t flags;
    bool bitmap, bulk;

    bitmap = bulk = false;
    flags = 0;

    WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
    if (cval.type == WT_CONFIG_ITEM_BOOL ||
            (cval.type == WT_CONFIG_ITEM_NUM &&
             (cval.val == 0 || cval.val == 1))) {
        bitmap = false;
        bulk = cval.val != 0;
    } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len))
        bitmap = bulk = true;
    else
        WT_RET_MSG(session, EINVAL,
                   "Value for 'bulk' must be a boolean or 'bitmap'");

    /* Bulk handles require exclusive access. */
    if (bulk)
        LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE);

    /* Get the handle and lock it while the cursor is using it. */
    if (WT_PREFIX_MATCH(uri, "file:")) {
        /*
         * If we are opening a bulk cursor, get the handle while
         * holding the checkpoint lock.  This prevents a bulk cursor
         * open failing with EBUSY due to a database-wide checkpoint.
         */
        if (bulk)
            __wt_spin_lock(
                session, &S2C(session)->checkpoint_lock);
        ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags);
        if (bulk)
            __wt_spin_unlock(
                session, &S2C(session)->checkpoint_lock);
        WT_RET(ret);
    } else
        WT_RET(__wt_bad_object_type(session, uri));

    WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp));

    /* Increment the data-source's in-use counter. */
    __wt_cursor_dhandle_incr_use(session);
    return (0);

err:	/* If the cursor could not be opened, release the handle. */
    WT_TRET(__wt_session_release_btree(session));
    return (ret);
}
Beispiel #10
0
/*
 * __wt_txn_reconfigure --
 *	WT_SESSION::reconfigure for transactions.
 */
int
__wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_RET;
	WT_TXN *txn;

	txn = &session->txn;

	ret = __wt_config_getones(session, config, "isolation", &cval);
	if (ret == 0 && cval.len != 0) {
		session->isolation = txn->isolation =
		    WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
		    WT_ISO_SNAPSHOT :
		    WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ?
		    WT_ISO_READ_UNCOMMITTED : WT_ISO_READ_COMMITTED;
	}
	WT_RET_NOTFOUND_OK(ret);

	return (0);
}
Beispiel #11
0
/*
 * __logmgr_config --
 *	Parse and setup the logging server options.
 */
static int
__logmgr_config(WT_SESSION_IMPL *session, const char **cfg, int *runp)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);

	/*
	 * The logging configuration is off by default.
	 */
	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
	*runp = cval.val != 0;

	/*
	 * Setup a log path, compression and encryption even if logging is
	 * disabled in case we are going to print a log.
	 */
	conn->log_compressor = NULL;
	WT_RET(__wt_config_gets_none(session, cfg, "log.compressor", &cval));
	WT_RET(__wt_compressor_config(session, &cval, &conn->log_compressor));

	WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
	WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path));

	/* We are done if logging isn't enabled. */
	if (*runp == 0)
		return (0);

	WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
	if (cval.val != 0)
		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);

	WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
	conn->log_file_max = (wt_off_t)cval.val;
	WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max);

	WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
	/*
	 * If pre-allocation is configured, set the initial number to one.
	 * We'll adapt as load dictates.
	 */
	if (cval.val != 0) {
		FLD_SET(conn->log_flags, WT_CONN_LOG_PREALLOC);
		conn->log_prealloc = 1;
	}
	WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
	if (cval.len != 0  && WT_STRING_MATCH("error", cval.str, cval.len))
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);

	WT_RET(__logmgr_sync_cfg(session, cfg));
	return (0);
}
Beispiel #12
0
/*
 * __ckpt_server_config --
 *	Parse and setup the checkpoint server options.
 */
static int
__ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	char *p;

	conn = S2C(session);

	/*
	 * The checkpoint configuration requires a wait time and/or a log
	 * size -- if one is not set, we're not running at all.
	 * Checkpoints based on log size also require logging be enabled.
	 */
	WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
	conn->ckpt_usecs = (long)cval.val * 1000000;
	WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval));
	conn->ckpt_logsize = (wt_off_t)cval.val;
	__wt_log_written_reset(session);
	if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) ||
	    (conn->ckpt_logsize && conn->ckpt_usecs == 0 &&
	     !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
		*startp = 0;
		return (0);
	}
	*startp = 1;

	/*
	 * The application can specify a checkpoint name, which we ignore if
	 * it's our default.
	 */
	WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval));
	if (cval.len != 0 &&
	    !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
		WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));

		WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp));
		WT_ERR(__wt_buf_fmt(
		    session, tmp, "name=%.*s", (int)cval.len, cval.str));
		WT_ERR(__wt_strdup(session, tmp->data, &p));

		__wt_free(session, conn->ckpt_config);
		conn->ckpt_config = p;
	}

err:	__wt_scr_free(session, &tmp);
	return (ret);
}
Beispiel #13
0
/*
 * __ckpt_named --
 *	Return the information associated with a file's named checkpoint.
 */
static int
__ckpt_named(WT_SESSION_IMPL *session,
    const char *checkpoint, const char *config, WT_CKPT *ckpt)
{
	WT_CONFIG ckptconf;
	WT_CONFIG_ITEM k, v;

	WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
	WT_RET(__wt_config_subinit(session, &ckptconf, &v));

	/*
	 * Take the first match: there should never be more than a single
	 * checkpoint of any name.
	 */
	while (__wt_config_next(&ckptconf, &k, &v) == 0)
		if (WT_STRING_MATCH(checkpoint, k.str, k.len))
			return (__ckpt_load(session, &k, &v, ckpt));

	return (WT_NOTFOUND);
}
Beispiel #14
0
/*
 * __wt_curfile_open --
 *	WT_SESSION->open_cursor method for the btree cursor type.
 */
int
__wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
    WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_RET;
	int bitmap, bulk;
	uint32_t flags;

	flags = 0;

	WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval));
	if (cval.type == ITEM_NUM && (cval.val == 0 || cval.val == 1)) {
		bitmap = 0;
		bulk = (cval.val != 0);
	} else if (WT_STRING_MATCH("bitmap", cval.str, cval.len))
		bitmap = bulk = 1;
	else
		WT_RET_MSG(session, EINVAL,
		    "Value for 'bulk' must be a boolean or 'bitmap'");

	/* Bulk handles require exclusive access. */
	if (bulk)
		LF_SET(WT_BTREE_BULK | WT_BTREE_EXCLUSIVE);

	/* TODO: handle projections. */

	/* Get the handle and lock it while the cursor is using it. */
	if (WT_PREFIX_MATCH(uri, "file:"))
		WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, flags));
	else
		WT_RET(__wt_bad_object_type(session, uri));

	WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp));
	return (0);

err:	/* If the cursor could not be opened, release the handle. */
	WT_TRET(__wt_session_release_btree(session));
	return (ret);
}
Beispiel #15
0
/*
 * __ckpt_named_addr --
 *	Return the cookie associated with a file's named checkpoint.
 */
static int
__ckpt_named_addr(WT_SESSION_IMPL *session,
    const char *checkpoint, const char *config, WT_ITEM *addr)
{
	WT_CONFIG ckptconf;
	WT_CONFIG_ITEM a, k, v;

	WT_RET(__wt_config_getones(session, config, "checkpoint", &v));
	WT_RET(__wt_config_subinit(session, &ckptconf, &v));

	/*
	 * Take the first match: there should never be more than a single
	 * checkpoint of any name.
	 */
	while (__wt_config_next(&ckptconf, &k, &v) == 0)
		if (WT_STRING_MATCH(checkpoint, k.str, k.len)) {
			WT_RET(__wt_config_subgets(session, &v, "addr", &a));
			if (a.len != 0)
				WT_RET(__wt_nhex_to_raw(
				    session, a.str, a.len, addr));
			return (0);
		}
	return (WT_NOTFOUND);
}
Beispiel #16
0
/*
 * __ckpt_server_config --
 *	Parse and setup the checkpoint server options.
 */
static int
__ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;

	conn = S2C(session);

	/*
	 * The checkpoint configuration requires a wait time -- if it's not set,
	 * we're not running at all.
	 */
	WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
	if (cval.val == 0) {
		*startp = 0;
		return (0);
	}
	conn->ckpt_usecs = (long)cval.val * 1000000;
	*startp = 1;

	WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval));

	if (!WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
		WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp));
		strcpy((char *)tmp->data, "name=");
		strncat((char *)tmp->data, cval.str, cval.len);
		ret = __wt_strndup(session,
		    tmp->data, strlen("name=") + cval.len, &conn->ckpt_config);
		__wt_scr_free(&tmp);
		WT_RET(ret);
	}

	return (0);
}
Beispiel #17
0
/*
 * __wt_lsm_tree_create --
 *	Create an LSM tree structure for the given name.
 */
int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
    const char *uri, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	const char *cfg[] =
	    { WT_CONFIG_BASE(session, session_create), config, NULL };
	const char *tmpconfig;

	/* If the tree is open, it already exists. */
	if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) {
		__wt_lsm_tree_release(session, lsm_tree);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If the tree has metadata, it already exists.
	 *
	 * !!!
	 * Use a local variable: we don't care what the existing configuration
	 * is, but we don't want to overwrite the real config.
	 */
	if (__wt_metadata_search(session, uri, &tmpconfig) == 0) {
		__wt_free(session, tmpconfig);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
	if (WT_STRING_MATCH("r", cval.str, cval.len))
		WT_RET_MSG(session, EINVAL,
		    "LSM trees cannot be configured as column stores");

	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));

	WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));

	WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->key_format));
	WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->value_format));

	WT_ERR(__wt_config_gets(session, cfg, "collator", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->collator_name));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval));
	if (cval.val)
		F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
	else
		F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval));
	FLD_SET(lsm_tree->bloom,
	    (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);

	if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
	    FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
		WT_ERR_MSG(session, EINVAL,
		    "Bloom filters can only be created on newest and oldest "
		    "chunks if bloom filters are enabled");

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval));
	if (cval.type == WT_CONFIG_ITEM_STRUCT) {
		cval.str++;
		cval.len -= 2;
	}
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->bloom_config));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval));
	lsm_tree->bloom_bit_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval));
	lsm_tree->bloom_hash_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval));
	lsm_tree->chunk_max = (uint64_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval));
	lsm_tree->chunk_size = (uint64_t)cval.val;
	if (lsm_tree->chunk_size > lsm_tree->chunk_max)
		WT_ERR_MSG(session, EINVAL,
		    "Chunk size (chunk_size) must be smaller than or equal to "
		    "the maximum chunk size (chunk_max)");
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval));
	lsm_tree->merge_max = (uint32_t)cval.val;
	lsm_tree->merge_min = lsm_tree->merge_max / 2;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_threads", &cval));
	lsm_tree->merge_threads = (uint32_t)cval.val;
	/* Sanity check that api_data.py is in sync with lsm.h */
	WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS);

	/*
	 * Set up the config for each chunk.  If possible, avoid high latencies
	 * from fsync by flushing the cache every 8MB (will be overridden by
	 * any application setting).
	 */
	tmpconfig = "";
#ifdef HAVE_SYNC_FILE_RANGE
	if (!S2C(session)->direct_io)
		tmpconfig = "os_cache_dirty_max=8MB,";
#endif
	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "%s%s,key_format=u,value_format=u", tmpconfig, config));
	lsm_tree->file_config = __wt_buf_steal(session, buf, NULL);

	/* Create the first chunk and flush the metadata. */
	WT_ERR(__wt_lsm_meta_write(session, lsm_tree));

	/* Discard our partially populated handle. */
	ret = __lsm_tree_discard(session, lsm_tree);
	lsm_tree = NULL;

	/*
	 * Open our new tree and add it to the handle cache. Don't discard on
	 * error: the returned handle is NULL on error, and the metadata
	 * tracking macros handle cleaning up on failure.
	 */
	if (ret == 0)
		ret = __lsm_tree_open(session, uri, &lsm_tree);
	if (ret == 0)
		__wt_lsm_tree_release(session, lsm_tree);

	if (0) {
err:		WT_TRET(__lsm_tree_discard(session, lsm_tree));
	}
	__wt_scr_free(&buf);
	return (ret);
}
Beispiel #18
0
/*
 * __wt_lsm_tree_create --
 *	Create an LSM tree structure for the given name.
 */
int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
    const char *uri, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	const char *cfg[] = API_CONF_DEFAULTS(session, create, config);
	const char *tmpconfig;

	/* If the tree is open, it already exists. */
	if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) {
		__wt_lsm_tree_release(session, lsm_tree);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If the tree has metadata, it already exists.
	 *
	 * !!!
	 * Use a local variable: we don't care what the existing configuration
	 * is, but we don't want to overwrite the real config.
	 */
	if (__wt_metadata_read(session, uri, &tmpconfig) == 0) {
		__wt_free(session, tmpconfig);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
	if (WT_STRING_MATCH("r", cval.str, cval.len))
		WT_RET_MSG(session, EINVAL,
		    "LSM trees cannot be configured as column stores");

	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));

	WT_RET(__lsm_tree_set_name(session, lsm_tree, uri));

	WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->key_format));
	WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->value_format));

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom", &cval));
	FLD_SET(lsm_tree->bloom,
	    (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_newest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST);
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_oldest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);

	if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
	    (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST) ||
	    FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)))
		WT_ERR_MSG(session, EINVAL,
		    "Bloom filters can only be created on newest and oldest "
		    "chunks if bloom filters are enabled");

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_config", &cval));
	if (cval.type == ITEM_STRUCT) {
		cval.str++;
		cval.len -= 2;
	}
	WT_ERR(__wt_strndup(session, cval.str, cval.len,
	    &lsm_tree->bloom_config));

	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_bit_count", &cval));
	lsm_tree->bloom_bit_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_hash_count", &cval));
	lsm_tree->bloom_hash_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_chunk_size", &cval));
	lsm_tree->chunk_size = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_max", &cval));
	lsm_tree->merge_max = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_threads", &cval));
	lsm_tree->merge_threads = (uint32_t)cval.val;
	/* Sanity check that api_data.py is in sync with lsm.h */
	WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS);

	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "%s,key_format=u,value_format=u", config));
	lsm_tree->file_config = __wt_buf_steal(session, buf, NULL);

	/* Create the first chunk and flush the metadata. */
	WT_ERR(__wt_lsm_meta_write(session, lsm_tree));

	/* Discard our partially populated handle. */
	ret = __lsm_tree_discard(session, lsm_tree);
	lsm_tree = NULL;

	/*
	 * Open our new tree and add it to the handle cache. Don't discard on
	 * error: the returned handle is NULL on error, and the metadata
	 * tracking macros handle cleaning up on failure.
	 */
	if (ret == 0)
		ret = __lsm_tree_open(session, uri, &lsm_tree);
	if (ret == 0)
		__wt_lsm_tree_release(session, lsm_tree);

	if (0) {
err:		WT_TRET(__lsm_tree_discard(session, lsm_tree));
	}
	__wt_scr_free(&buf);
	return (ret);
}
Beispiel #19
0
/*
 * config_check --
 *	Check the keys in an application-supplied config string match what is
 * specified in an array of check strings.
 */
static int
config_check(WT_SESSION_IMPL *session,
    const WT_CONFIG_CHECK *checks, const char *config, size_t config_len)
{
	WT_CONFIG parser, cparser, sparser;
	WT_CONFIG_ITEM k, v, ck, cv, dummy;
	WT_DECL_RET;
	int i;
	bool badtype, found;

	/*
	 * The config_len parameter is optional, and allows passing in strings
	 * that are not nul-terminated.
	 */
	if (config_len == 0)
		WT_RET(__wt_config_init(session, &parser, config));
	else
		WT_RET(__wt_config_initn(session, &parser, config, config_len));
	while ((ret = __wt_config_next(&parser, &k, &v)) == 0) {
		if (k.type != WT_CONFIG_ITEM_STRING &&
		    k.type != WT_CONFIG_ITEM_ID)
			WT_RET_MSG(session, EINVAL,
			    "Invalid configuration key found: '%.*s'",
			    (int)k.len, k.str);

		/* Search for a matching entry. */
		for (i = 0; checks[i].name != NULL; i++)
			if (WT_STRING_MATCH(checks[i].name, k.str, k.len))
				break;
		if (checks[i].name == NULL)
			WT_RET_MSG(session, EINVAL,
			    "unknown configuration key: '%.*s'",
			    (int)k.len, k.str);

		if (strcmp(checks[i].type, "boolean") == 0) {
			badtype = v.type != WT_CONFIG_ITEM_BOOL &&
			    (v.type != WT_CONFIG_ITEM_NUM ||
			    (v.val != 0 && v.val != 1));
		} else if (strcmp(checks[i].type, "category") == 0) {
			/* Deal with categories of the form: XXX=(XXX=blah). */
			ret = config_check(session,
			    checks[i].subconfigs,
			    k.str + strlen(checks[i].name) + 1, v.len);
			if (ret != EINVAL)
				badtype = false;
			else
				badtype = true;
		} else if (strcmp(checks[i].type, "format") == 0) {
			badtype = false;
		} else if (strcmp(checks[i].type, "int") == 0) {
			badtype = v.type != WT_CONFIG_ITEM_NUM;
		} else if (strcmp(checks[i].type, "list") == 0) {
			badtype = v.len > 0 && v.type != WT_CONFIG_ITEM_STRUCT;
		} else if (strcmp(checks[i].type, "string") == 0) {
			badtype = false;
		} else
			WT_RET_MSG(session, EINVAL,
			    "unknown configuration type: '%s'",
			    checks[i].type);

		if (badtype)
			WT_RET_MSG(session, EINVAL,
			    "Invalid value for key '%.*s': expected a %s",
			    (int)k.len, k.str, checks[i].type);

		if (checks[i].checkf != NULL)
			WT_RET(checks[i].checkf(session, &v));

		if (checks[i].checks == NULL)
			continue;

		/* Setup an iterator for the check string. */
		WT_RET(__wt_config_init(session, &cparser, checks[i].checks));
		while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
			if (WT_STRING_MATCH("min", ck.str, ck.len)) {
				if (v.val < cv.val)
					WT_RET_MSG(session, EINVAL,
					    "Value too small for key '%.*s' "
					    "the minimum is %.*s",
					    (int)k.len, k.str,
					    (int)cv.len, cv.str);
			} else if (WT_STRING_MATCH("max", ck.str, ck.len)) {
				if (v.val > cv.val)
					WT_RET_MSG(session, EINVAL,
					    "Value too large for key '%.*s' "
					    "the maximum is %.*s",
					    (int)k.len, k.str,
					    (int)cv.len, cv.str);
			} else if (WT_STRING_MATCH("choices", ck.str, ck.len)) {
				if (v.len == 0)
					WT_RET_MSG(session, EINVAL,
					    "Key '%.*s' requires a value",
					    (int)k.len, k.str);
				if (v.type == WT_CONFIG_ITEM_STRUCT) {
					/*
					 * Handle the 'verbose' case of a list
					 * containing restricted choices.
					 */
					WT_RET(__wt_config_subinit(session,
					    &sparser, &v));
					found = true;
					while (found &&
					    (ret = __wt_config_next(&sparser,
					    &v, &dummy)) == 0) {
						ret = __wt_config_subgetraw(
						    session, &cv, &v, &dummy);
						found = ret == 0;
					}
				} else  {
					ret = __wt_config_subgetraw(session,
					    &cv, &v, &dummy);
					found = ret == 0;
				}

				if (ret != 0 && ret != WT_NOTFOUND)
					return (ret);
				if (!found)
					WT_RET_MSG(session, EINVAL,
					    "Value '%.*s' not a "
					    "permitted choice for key '%.*s'",
					    (int)v.len, v.str,
					    (int)k.len, k.str);
			} else
				WT_RET_MSG(session, EINVAL,
				    "unexpected configuration description "
				    "keyword %.*s", (int)ck.len, ck.str);
		}
	}

	if (ret == WT_NOTFOUND)
		ret = 0;

	return (ret);
}
Beispiel #20
0
/*
 * __logmgr_config --
 *	Parse and setup the logging server options.
 */
static int
__logmgr_config(
    WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	bool enabled;

	conn = S2C(session);

	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
	enabled = cval.val != 0;

	/*
	 * If we're reconfiguring, enabled must match the already
	 * existing setting.
	 *
	 * If it is off and the user it turning it on, or it is on
	 * and the user is turning it off, return an error.
	 */
	if (reconfig &&
	    ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
	    (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))))
		return (EINVAL);

	/* Logging is incompatible with in-memory */
	if (enabled) {
		WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
		if (cval.val != 0)
			WT_RET_MSG(session, EINVAL,
			    "In memory configuration incompatible with "
			    "log=(enabled=true)");
	}

	*runp = enabled;

	/*
	 * Setup a log path and compression even if logging is disabled in case
	 * we are going to print a log.  Only do this on creation.  Once a
	 * compressor or log path are set they cannot be changed.
	 */
	if (!reconfig) {
		conn->log_compressor = NULL;
		WT_RET(__wt_config_gets_none(
		    session, cfg, "log.compressor", &cval));
		WT_RET(__wt_compressor_config(
		    session, &cval, &conn->log_compressor));

		WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
		WT_RET(__wt_strndup(
		    session, cval.str, cval.len, &conn->log_path));
	}
	/* We are done if logging isn't enabled. */
	if (!*runp)
		return (0);

	WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
	if (cval.val != 0)
		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);

	if (!reconfig) {
		/*
		 * Ignore if the user tries to change the file size.  The
		 * amount of memory allocated to the log slots may be based
		 * on the log file size at creation and we don't want to
		 * re-allocate that memory while running.
		 */
		WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
		conn->log_file_max = (wt_off_t)cval.val;
		WT_STAT_FAST_CONN_SET(session,
		    log_max_filesize, conn->log_file_max);
	}

	/*
	 * If pre-allocation is configured, set the initial number to a few.
	 * We'll adapt as load dictates.
	 */
	WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
	if (cval.val != 0)
		conn->log_prealloc = 1;

	/*
	 * Note that it is meaningless to reconfigure this value during
	 * runtime.  It only matters on create before recovery runs.
	 */
	WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
	if (cval.len != 0  && WT_STRING_MATCH("error", cval.str, cval.len))
		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);

	WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
	if (cval.val != 0) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_RET_MSG(session, EINVAL,
			    "Read-only configuration incompatible with "
			    "zero-filling log files");
		FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
	}

	WT_RET(__logmgr_sync_cfg(session, cfg));
	if (conn->log_cond != NULL)
		WT_RET(__wt_cond_auto_signal(session, conn->log_cond));
	return (0);
}
Beispiel #21
0
/*
 * __wt_txn_commit --
 *	Commit the current transaction.
 */
int
__wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_OP *op;
	u_int i;
	bool locked, readonly;
#ifdef HAVE_TIMESTAMPS
	wt_timestamp_t prev_commit_timestamp, ts;
	bool update_timestamp;
#endif

	txn = &session->txn;
	conn = S2C(session);
	txn_global = &conn->txn_global;
	locked = false;

	WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) ||
	    txn->mod_count == 0);

	readonly = txn->mod_count == 0;
	/*
	 * Look for a commit timestamp.
	 */
	WT_ERR(
	    __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval));
	if (cval.len != 0) {
#ifdef HAVE_TIMESTAMPS
		WT_ERR(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
		WT_ERR(__wt_timestamp_validate(session,
		    "commit", &ts, &cval, true, true, true));
		__wt_timestamp_set(&txn->commit_timestamp, &ts);
		__wt_txn_set_commit_timestamp(session);
#else
		WT_ERR_MSG(session, EINVAL, "commit_timestamp requires a "
		    "version of WiredTiger built with timestamp support");
#endif
	}

#ifdef HAVE_TIMESTAMPS
	/*
	 * Debugging checks on timestamps, if user requested them.
	 */
	if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) &&
	    !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
	    txn->mod_count != 0)
		WT_ERR_MSG(session, EINVAL, "commit_timestamp required and "
		    "none set on this transaction");
	if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) &&
	    F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
	    txn->mod_count != 0)
		WT_ERR_MSG(session, EINVAL, "no commit_timestamp required and "
		    "timestamp set on this transaction");
#endif
	/*
	 * The default sync setting is inherited from the connection, but can
	 * be overridden by an explicit "sync" setting for this transaction.
	 */
	WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));

	/*
	 * If the user chose the default setting, check whether sync is enabled
	 * for this transaction (either inherited or via begin_transaction).
	 * If sync is disabled, clear the field to avoid the log write being
	 * flushed.
	 *
	 * Otherwise check for specific settings.  We don't need to check for
	 * "on" because that is the default inherited from the connection.  If
	 * the user set anything in begin_transaction, we only override with an
	 * explicit setting.
	 */
	if (cval.len == 0) {
		if (!FLD_ISSET(txn->txn_logsync, WT_LOG_SYNC_ENABLED) &&
		    !F_ISSET(txn, WT_TXN_SYNC_SET))
			txn->txn_logsync = 0;
	} else {
		/*
		 * If the caller already set sync on begin_transaction then
		 * they should not be using sync on commit_transaction.
		 * Flag that as an error.
		 */
		if (F_ISSET(txn, WT_TXN_SYNC_SET))
			WT_ERR_MSG(session, EINVAL,
			    "Sync already set during begin_transaction");
		if (WT_STRING_MATCH("background", cval.str, cval.len))
			txn->txn_logsync = WT_LOG_BACKGROUND;
		else if (WT_STRING_MATCH("off", cval.str, cval.len))
			txn->txn_logsync = 0;
		/*
		 * We don't need to check for "on" here because that is the
		 * default to inherit from the connection setting.
		 */
	}

	/* Commit notification. */
	if (txn->notify != NULL)
		WT_ERR(txn->notify->notify(txn->notify,
		    (WT_SESSION *)session, txn->id, 1));

	/*
	 * We are about to release the snapshot: copy values into any
	 * positioned cursors so they don't point to updates that could be
	 * freed once we don't have a snapshot.
	 */
	if (session->ncursors > 0) {
		WT_DIAGNOSTIC_YIELD;
		WT_ERR(__wt_session_copy_values(session));
	}

	/* If we are logging, write a commit log record. */
	if (txn->logrec != NULL &&
	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
	    !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
		/*
		 * We are about to block on I/O writing the log.
		 * Release our snapshot in case it is keeping data pinned.
		 * This is particularly important for checkpoints.
		 */
		__wt_txn_release_snapshot(session);
		/*
		 * We hold the visibility lock for reading from the time
		 * we write our log record until the time we release our
		 * transaction so that the LSN any checkpoint gets will
		 * always reflect visible data.
		 */
		__wt_readlock(session, &txn_global->visibility_rwlock);
		locked = true;
		WT_ERR(__wt_txn_log_commit(session, cfg));
	}

	/* Note: we're going to commit: nothing can fail after this point. */

	/* Process and free updates. */
	for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
		switch (op->type) {
		case WT_TXN_OP_BASIC:
		case WT_TXN_OP_BASIC_TS:
		case WT_TXN_OP_INMEM:
			/*
			 * Switch reserved operations to abort to
			 * simplify obsolete update list truncation.
			 */
			if (op->u.upd->type == WT_UPDATE_RESERVED) {
				op->u.upd->txnid = WT_TXN_ABORTED;
				break;
			}

			/*
			 * Writes to the lookaside file can be evicted as soon
			 * as they commit.
			 */
			if (conn->cache->las_fileid != 0 &&
			    op->fileid == conn->cache->las_fileid) {
				op->u.upd->txnid = WT_TXN_NONE;
				break;
			}

#ifdef HAVE_TIMESTAMPS
			if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
			    op->type != WT_TXN_OP_BASIC_TS) {
				WT_ASSERT(session,
				    op->fileid != WT_METAFILE_ID);
				__wt_timestamp_set(&op->u.upd->timestamp,
				    &txn->commit_timestamp);
			}
#endif
			break;

		case WT_TXN_OP_REF:
#ifdef HAVE_TIMESTAMPS
			if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
				__wt_timestamp_set(
				    &op->u.ref->page_del->timestamp,
				    &txn->commit_timestamp);
#endif
			break;

		case WT_TXN_OP_TRUNCATE_COL:
		case WT_TXN_OP_TRUNCATE_ROW:
			/* Other operations don't need timestamps. */
			break;
		}

		__wt_txn_op_free(session, op);
	}
	txn->mod_count = 0;

#ifdef HAVE_TIMESTAMPS
	/*
	 * Track the largest commit timestamp we have seen.
	 *
	 * We don't actually clear the local commit timestamp, just the flag.
	 * That said, we can't update the global commit timestamp until this
	 * transaction is visible, which happens when we release it.
	 */
	update_timestamp = F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
#endif

	__wt_txn_release(session);
	if (locked)
		__wt_readunlock(session, &txn_global->visibility_rwlock);

#ifdef HAVE_TIMESTAMPS
	/* First check if we've already committed something in the future. */
	if (update_timestamp) {
		WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
		    __wt_timestamp_set(
			&prev_commit_timestamp, &txn_global->commit_timestamp));
		update_timestamp = __wt_timestamp_cmp(
		    &txn->commit_timestamp, &prev_commit_timestamp) > 0;
	}

	/*
	 * If it looks like we need to move the global commit timestamp,
	 * write lock and re-check.
	 */
	if (update_timestamp) {
#if WT_TIMESTAMP_SIZE == 8
		while (__wt_timestamp_cmp(
		    &txn->commit_timestamp, &prev_commit_timestamp) > 0) {
			if (__wt_atomic_cas64(
			    &txn_global->commit_timestamp.val,
			    prev_commit_timestamp.val,
			    txn->commit_timestamp.val)) {
				txn_global->has_commit_timestamp = true;
				break;
			}
		    __wt_timestamp_set(
			&prev_commit_timestamp, &txn_global->commit_timestamp);
		}
#else
		__wt_writelock(session, &txn_global->rwlock);
		if (__wt_timestamp_cmp(&txn->commit_timestamp,
		    &txn_global->commit_timestamp) > 0) {
			__wt_timestamp_set(&txn_global->commit_timestamp,
			    &txn->commit_timestamp);
			txn_global->has_commit_timestamp = true;
		}
		__wt_writeunlock(session, &txn_global->rwlock);
#endif
	}
#endif

	/*
	 * We're between transactions, if we need to block for eviction, it's
	 * a good time to do so.  Note that we must ignore any error return
	 * because the user's data is committed.
	 */
	if (!readonly)
		(void)__wt_cache_eviction_check(session, false, false, NULL);
	return (0);

err:	/*
	 * If anything went wrong, roll back.
	 *
	 * !!!
	 * Nothing can fail after this point.
	 */
	if (locked)
		__wt_readunlock(session, &txn_global->visibility_rwlock);
	WT_TRET(__wt_txn_rollback(session, cfg));
	return (ret);
}
Beispiel #22
0
/*
 * __wt_curfile_create --
 *	Open a cursor for a given btree handle.
 */
int
__wt_curfile_create(WT_SESSION_IMPL *session,
    WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
	static WT_CURSOR iface = {
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		NULL,
		__curfile_compare,
		__curfile_next,
		__curfile_prev,
		__curfile_reset,
		__curfile_search,
		__curfile_search_near,
		__curfile_insert,
		__curfile_update,
		__curfile_remove,
		__curfile_close,
		{ NULL, NULL },		/* TAILQ_ENTRY q */
		0,			/* recno key */
		{ 0 },			/* recno raw buffer */
		{ NULL, 0, 0, NULL, 0 },/* WT_ITEM key */
		{ NULL, 0, 0, NULL, 0 },/* WT_ITEM value */
		0,			/* int saved_err */
		0			/* uint32_t flags */
	};
	WT_BTREE *btree;
	WT_CONFIG_ITEM cval;
	WT_CURSOR *cursor;
	WT_CURSOR_BTREE *cbt;
	WT_DECL_RET;
	size_t csize;
	int bitmap, bulk;

	cbt = NULL;

	btree = session->btree;
	WT_ASSERT(session, btree != NULL);

	WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval));
	if ((cval.type == ITEM_ID || cval.type == ITEM_STRING) &&
	    WT_STRING_MATCH("bitmap", cval.str, cval.len))
		bitmap = bulk = 1;
	else {
		bitmap = 0;
		bulk = (cval.val != 0);
	}

	csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE);
	WT_RET(__wt_calloc(session, 1, csize, &cbt));

	cursor = &cbt->iface;
	*cursor = iface;
	cursor->session = &session->iface;
	cursor->uri = btree->name;
	cursor->key_format = btree->key_format;
	cursor->value_format = btree->value_format;

	cbt->btree = session->btree;
	if (bulk)
		WT_ERR(__wt_curbulk_init((WT_CURSOR_BULK *)cbt, bitmap));

	/*
	 * no_cache
	 * No cache cursors are read-only.
	 */
	WT_ERR(__wt_config_gets_defno(session, cfg, "no_cache", &cval));
	if (cval.val != 0) {
		cursor->insert = __wt_cursor_notsup;
		cursor->update = __wt_cursor_notsup;
		cursor->remove = __wt_cursor_notsup;
	}

	/*
	 * random_retrieval
	 * Random retrieval cursors only support next, reset and close.
	 */
	WT_ERR(__wt_config_gets_defno(session, cfg, "next_random", &cval));
	if (cval.val != 0) {
		__wt_cursor_set_notsup(cursor);
		cursor->next = __curfile_next_random;
		cursor->reset = __curfile_reset;
	}

	/* __wt_cursor_init is last so we don't have to clean up on error. */
	STATIC_ASSERT(offsetof(WT_CURSOR_BTREE, iface) == 0);
	WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));

	if (0) {
err:		__wt_free(session, cbt);
	}

	return (ret);
}
Beispiel #23
0
Datei: txn.c Projekt: To4e/mongo
/*
 * __wt_txn_commit --
 *	Commit the current transaction.
 */
int
__wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_TXN *txn;
	WT_TXN_OP *op;
	u_int i;

	txn = &session->txn;
	conn = S2C(session);
	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);

	if (!F_ISSET(txn, WT_TXN_RUNNING))
		WT_RET_MSG(session, EINVAL, "No transaction is active");

	/*
	 * The default sync setting is inherited from the connection, but can
	 * be overridden by an explicit "sync" setting for this transaction.
	 */
	WT_RET(__wt_config_gets_def(session, cfg, "sync", 0, &cval));

	/*
	 * If the user chose the default setting, check whether sync is enabled
	 * for this transaction (either inherited or via begin_transaction).
	 * If sync is disabled, clear the field to avoid the log write being
	 * flushed.
	 *
	 * Otherwise check for specific settings.  We don't need to check for
	 * "on" because that is the default inherited from the connection.  If
	 * the user set anything in begin_transaction, we only override with an
	 * explicit setting.
	 */
	if (cval.len == 0) {
		if (!FLD_ISSET(txn->txn_logsync, WT_LOG_FLUSH) &&
		    !F_ISSET(txn, WT_TXN_SYNC_SET))
			txn->txn_logsync = 0;
	} else {
		/*
		 * If the caller already set sync on begin_transaction then
		 * they should not be using sync on commit_transaction.
		 * Flag that as an error.
		 */
		if (F_ISSET(txn, WT_TXN_SYNC_SET))
			WT_RET_MSG(session, EINVAL,
			    "Sync already set during begin_transaction.");
		if (WT_STRING_MATCH("background", cval.str, cval.len))
			txn->txn_logsync = WT_LOG_BACKGROUND;
		else if (WT_STRING_MATCH("off", cval.str, cval.len))
			txn->txn_logsync = 0;
		/*
		 * We don't need to check for "on" here because that is the
		 * default to inherit from the connection setting.
		 */
	}

	/* Commit notification. */
	if (txn->notify != NULL)
		WT_TRET(txn->notify->notify(txn->notify,
		    (WT_SESSION *)session, txn->id, 1));

	/* If we are logging, write a commit log record. */
	if (ret == 0 && txn->mod_count > 0 &&
	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
	    !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
		/*
		 * We are about to block on I/O writing the log.
		 * Release our snapshot in case it is keeping data pinned.
		 * This is particularly important for checkpoints.
		 */
		__wt_txn_release_snapshot(session);
		ret = __wt_txn_log_commit(session, cfg);
		WT_ASSERT(session, ret == 0);
	}

	/*
	 * If anything went wrong, roll back.
	 *
	 * !!!
	 * Nothing can fail after this point.
	 */
	if (ret != 0) {
		WT_TRET(__wt_txn_rollback(session, cfg));
		return (ret);
	}

	/* Free memory associated with updates. */
	for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++)
		__wt_txn_op_free(session, op);
	txn->mod_count = 0;

	/*
	 * We are about to release the snapshot: copy values into any
	 * positioned cursors so they don't point to updates that could be
	 * freed once we don't have a transaction ID pinned.
	 */
	if (session->ncursors > 0)
		WT_RET(__wt_session_copy_values(session));

	__wt_txn_release(session);
	return (0);
}
Beispiel #24
0
/*
 * __wt_curfile_open --
 *	WT_SESSION->open_cursor method for the btree cursor type.
 */
int
__wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
    WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_RET;
	uint32_t flags;
	bool bitmap, bulk, checkpoint_wait;

	bitmap = bulk = false;
	checkpoint_wait = true;
	flags = 0;

	/*
	 * Decode the bulk configuration settings. In memory databases
	 * ignore bulk load.
	 */
	if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
		WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval));
		if (cval.type == WT_CONFIG_ITEM_BOOL ||
		    (cval.type == WT_CONFIG_ITEM_NUM &&
		    (cval.val == 0 || cval.val == 1))) {
			bitmap = false;
			bulk = cval.val != 0;
		} else if (WT_STRING_MATCH("bitmap", cval.str, cval.len))
			bitmap = bulk = true;
			/*
			 * Unordered bulk insert is a special case used
			 * internally by index creation on existing tables. It
			 * doesn't enforce any special semantics at the file
			 * level. It primarily exists to avoid some locking
			 * problems between LSM and index creation.
			 */
		else if (!WT_STRING_MATCH("unordered", cval.str, cval.len))
			WT_RET_MSG(session, EINVAL,
			    "Value for 'bulk' must be a boolean or 'bitmap'");

		if (bulk) {
			WT_RET(__wt_config_gets(session,
			    cfg, "checkpoint_wait", &cval));
			checkpoint_wait = cval.val != 0;
		}
	}

	/* Bulk handles require exclusive access. */
	if (bulk)
		LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE);

	/* Get the handle and lock it while the cursor is using it. */
	if (WT_PREFIX_MATCH(uri, "file:")) {
		/*
		 * If we are opening exclusive and don't want a bulk cursor
		 * open to fail with EBUSY due to a database-wide checkpoint,
		 * get the handle while holding the checkpoint lock.
		 */
		if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait)
			WT_WITH_CHECKPOINT_LOCK(session,
			    ret = __wt_session_get_btree_ckpt(
			    session, uri, cfg, flags));
		else
			ret = __wt_session_get_btree_ckpt(
			    session, uri, cfg, flags);
		WT_RET(ret);
	} else
		WT_RET(__wt_bad_object_type(session, uri));

	WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp));

	return (0);

err:	/* If the cursor could not be opened, release the handle. */
	WT_TRET(__wt_session_release_btree(session));
	return (ret);
}
Beispiel #25
0
/*
 * __schema_open_index --
 *	Open one or more indices for a table (internal version).
 */
static int
__schema_open_index(WT_SESSION_IMPL *session,
    WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp)
{
	WT_CURSOR *cursor;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	WT_INDEX *idx;
	u_int i;
	int cmp;
	bool match;
	const char *idxconf, *name, *tablename, *uri;

	/* Check if we've already done the work. */
	if (idxname == NULL && table->idx_complete)
		return (0);

	cursor = NULL;
	idx = NULL;
	match = false;

	/* Build a search key. */
	tablename = table->name;
	(void)WT_PREFIX_SKIP(tablename, "table:");
	WT_ERR(__wt_scr_alloc(session, 512, &tmp));
	WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename));

	/* Find matching indices. */
	WT_ERR(__wt_metadata_cursor(session, &cursor));
	cursor->set_key(cursor, tmp->data);
	if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0)
		ret = cursor->next(cursor);
	for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) {
		WT_ERR(cursor->get_key(cursor, &uri));
		name = uri;
		if (!WT_PREFIX_SKIP(name, tmp->data))
			break;

		/* Is this the index we are looking for? */
		match = idxname == NULL || WT_STRING_MATCH(name, idxname, len);

		/*
		 * Ensure there is space, including if we have to make room for
		 * a new entry in the middle of the list.
		 */
		WT_ERR(__wt_realloc_def(session, &table->idx_alloc,
		    WT_MAX(i, table->nindices) + 1, &table->indices));

		/* Keep the in-memory list in sync with the metadata. */
		cmp = 0;
		while (table->indices[i] != NULL &&
		    (cmp = strcmp(uri, table->indices[i]->name)) > 0) {
			/* Index no longer exists, remove it. */
			__wt_free(session, table->indices[i]);
			memmove(&table->indices[i], &table->indices[i + 1],
			    (table->nindices - i) * sizeof(WT_INDEX *));
			table->indices[--table->nindices] = NULL;
		}
		if (cmp < 0) {
			/* Make room for a new index. */
			memmove(&table->indices[i + 1], &table->indices[i],
			    (table->nindices - i) * sizeof(WT_INDEX *));
			table->indices[i] = NULL;
			++table->nindices;
		}

		if (!match)
			continue;

		if (table->indices[i] == NULL) {
			WT_ERR(cursor->get_value(cursor, &idxconf));
			WT_ERR(__wt_calloc_one(session, &idx));
			WT_ERR(__wt_strdup(session, uri, &idx->name));
			WT_ERR(__wt_strdup(session, idxconf, &idx->config));
			WT_ERR(__open_index(session, table, idx));

			/*
			 * If we're checking the creation of an index before a
			 * table is fully created, don't save the index: it
			 * will need to be reopened once the table is complete.
			 */
			if (!table->cg_complete) {
				WT_ERR(
				    __wt_schema_destroy_index(session, &idx));
				if (idxname != NULL)
					break;
				continue;
			}

			table->indices[i] = idx;
			idx = NULL;

			/*
			 * If the slot is bigger than anything else we've seen,
			 * bump the number of indices.
			 */
			if (i >= table->nindices)
				table->nindices = i + 1;
		}

		/* If we were looking for a single index, we're done. */
		if (indexp != NULL)
			*indexp = table->indices[i];
		if (idxname != NULL)
			break;
	}
	WT_ERR_NOTFOUND_OK(ret);
	if (idxname != NULL && !match)
		ret = WT_NOTFOUND;

	/* If we did a full pass, we won't need to do it again. */
	if (idxname == NULL) {
		table->nindices = i;
		table->idx_complete = true;
	}

err:	WT_TRET(__wt_metadata_cursor_release(session, &cursor));
	WT_TRET(__wt_schema_destroy_index(session, &idx));

	__wt_scr_free(session, &tmp);
	return (ret);
}
Beispiel #26
0
/*
 * __wt_block_open --
 *	Open a block handle.
 */
int
__wt_block_open(WT_SESSION_IMPL *session,
    const char *filename, const char *cfg[],
    int forced_salvage, int readonly, uint32_t allocsize, WT_BLOCK **blockp)
{
	WT_BLOCK *block;
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;

	WT_TRET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename));

	conn = S2C(session);
	*blockp = NULL;

	__wt_spin_lock(session, &conn->block_lock);
	TAILQ_FOREACH(block, &conn->blockqh, q)
		if (strcmp(filename, block->name) == 0) {
			++block->ref;
			*blockp = block;
			__wt_spin_unlock(session, &conn->block_lock);
			return (0);
		}

	/* Basic structure allocation, initialization. */
	WT_ERR(__wt_calloc_def(session, 1, &block));
	block->ref = 1;
	TAILQ_INSERT_HEAD(&conn->blockqh, block, q);

	WT_ERR(__wt_strdup(session, filename, &block->name));
	block->allocsize = allocsize;

	WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval));
	block->allocfirst =
	    WT_STRING_MATCH("first", cval.str, cval.len) ? 1 : 0;

	/* Configuration: optional OS buffer cache maximum size. */
	WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval));
	block->os_cache_max = (size_t)cval.val;
#ifdef HAVE_POSIX_FADVISE
	if (conn->direct_io && block->os_cache_max)
		WT_ERR_MSG(session, EINVAL,
		    "os_cache_max not supported in combination with direct_io");
#else
	if (block->os_cache_max)
		WT_ERR_MSG(session, EINVAL,
		    "os_cache_max not supported if posix_fadvise not "
		    "available");
#endif

	/* Configuration: optional immediate write scheduling flag. */
	WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval));
	block->os_cache_dirty_max = (size_t)cval.val;
#ifdef HAVE_SYNC_FILE_RANGE
	if (conn->direct_io && block->os_cache_dirty_max)
		WT_ERR_MSG(session, EINVAL,
		    "os_cache_dirty_max not supported in combination with "
		    "direct_io");
#else
	if (block->os_cache_dirty_max) {
		/*
		 * Ignore any setting if it is not supported.
		 */
		block->os_cache_dirty_max = 0;
		WT_ERR(__wt_verbose(session, WT_VERB_BLOCK,
		    "os_cache_dirty_max ignored when sync_file_range not "
		    "available"));
	}
#endif

	/* Open the underlying file handle. */
	WT_ERR(__wt_open(session, filename, 0, 0,
	    readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA,
	    &block->fh));

	/* Initialize the live checkpoint's lock. */
	WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));

	/*
	 * Read the description information from the first block.
	 *
	 * Salvage is a special case: if we're forcing the salvage, we don't
	 * look at anything, including the description information.
	 */
	if (!forced_salvage)
		WT_ERR(__desc_read(session, block));

	*blockp = block;
	__wt_spin_unlock(session, &conn->block_lock);
	return (0);

err:	WT_TRET(__block_destroy(session, block));
	__wt_spin_unlock(session, &conn->block_lock);
	return (ret);
}
Beispiel #27
0
/*
 * __logmgr_config --
 *	Parse and setup the logging server options.
 */
static int
__logmgr_config(
    WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	bool enabled;

	/*
	 * A note on reconfiguration: the standard "is this configuration string
	 * allowed" checks should fail if reconfiguration has invalid strings,
	 * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
	 * the connection reconfiguration method doesn't allow those strings.
	 * Additionally, the base configuration values during reconfiguration
	 * are the currently configured values (so we don't revert to default
	 * values when repeatedly reconfiguring), and configuration processing
	 * of a currently set value should not change the currently set value.
	 *
	 * In this code path, log server reconfiguration does not stop/restart
	 * the log server, so there's no point in re-evaluating configuration
	 * strings that cannot be reconfigured, risking bugs in configuration
	 * setup, and depending on evaluation of currently set values to always
	 * result in the currently set value. Skip tests for any configuration
	 * strings which don't make sense during reconfiguration, but don't
	 * worry about error reporting because it should never happen.
	 */

	conn = S2C(session);

	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
	enabled = cval.val != 0;

	/*
	 * If we're reconfiguring, enabled must match the already
	 * existing setting.
	 *
	 * If it is off and the user it turning it on, or it is on
	 * and the user is turning it off, return an error.
	 *
	 * See above: should never happen.
	 */
	if (reconfig &&
	    ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
	    (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))))
		return (EINVAL);

	/* Logging is incompatible with in-memory */
	if (enabled) {
		WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
		if (cval.val != 0)
			WT_RET_MSG(session, EINVAL,
			    "In memory configuration incompatible with "
			    "log=(enabled=true)");
	}

	*runp = enabled;

	/*
	 * Setup a log path and compression even if logging is disabled in case
	 * we are going to print a log.  Only do this on creation.  Once a
	 * compressor or log path are set they cannot be changed.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		conn->log_compressor = NULL;
		WT_RET(__wt_config_gets_none(
		    session, cfg, "log.compressor", &cval));
		WT_RET(__wt_compressor_config(
		    session, &cval, &conn->log_compressor));

		WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
		WT_RET(__wt_strndup(
		    session, cval.str, cval.len, &conn->log_path));
	}

	/* We are done if logging isn't enabled. */
	if (!*runp)
		return (0);

	WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
	if (cval.val != 0)
		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);

	/*
	 * The file size cannot be reconfigured. The amount of memory allocated
	 * to the log slots may be based on the log file size at creation and we
	 * don't want to re-allocate that memory while running.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
		conn->log_file_max = (wt_off_t)cval.val;
		WT_STAT_FAST_CONN_SET(session,
		    log_max_filesize, conn->log_file_max);
	}

	/*
	 * If pre-allocation is configured, set the initial number to a few.
	 * We'll adapt as load dictates.
	 */
	WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
	if (cval.val != 0)
		conn->log_prealloc = 1;

	/*
	 * Note it's meaningless to reconfigure this value during runtime, it
	 * only matters on create before recovery runs.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		WT_RET(__wt_config_gets_def(
		    session, cfg, "log.recover", 0, &cval));
		if (WT_STRING_MATCH("error", cval.str, cval.len))
			FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
	}

	WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
	if (cval.val != 0) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_RET_MSG(session, EINVAL,
			    "Read-only configuration incompatible with "
			    "zero-filling log files");
		FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
	}

	WT_RET(__logmgr_sync_cfg(session, cfg));
	if (conn->log_cond != NULL)
		WT_RET(__wt_cond_auto_signal(session, conn->log_cond));
	return (0);
}
Beispiel #28
0
/*
 * __wt_lsm_tree_create --
 *	Create an LSM tree structure for the given name.
 */
int
__wt_lsm_tree_create(WT_SESSION_IMPL *session,
    const char *uri, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	const char *cfg[] =
	    { WT_CONFIG_BASE(session, session_create), config, NULL };
	char *tmpconfig;

	/* If the tree is open, it already exists. */
	if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) {
		__wt_lsm_tree_release(session, lsm_tree);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	/*
	 * If the tree has metadata, it already exists.
	 *
	 * !!!
	 * Use a local variable: we don't care what the existing configuration
	 * is, but we don't want to overwrite the real config.
	 */
	if (__wt_metadata_search(session, uri, &tmpconfig) == 0) {
		__wt_free(session, tmpconfig);
		return (exclusive ? EEXIST : 0);
	}
	WT_RET_NOTFOUND_OK(ret);

	WT_RET(__wt_config_gets(session, cfg, "key_format", &cval));
	if (WT_STRING_MATCH("r", cval.str, cval.len))
		WT_RET_MSG(session, EINVAL,
		    "LSM trees cannot be configured as column stores");

	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));

	WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));

	WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval));
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->key_format));
	WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval));
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->value_format));

	WT_ERR(__wt_config_gets(session, cfg, "collator", &cval));
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->collator_name));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval));
	if (cval.val)
		F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
	else
		F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval));
	FLD_SET(lsm_tree->bloom,
	    (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED));
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval));
	if (cval.val != 0)
		FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST);

	if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
	    FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))
		WT_ERR_MSG(session, EINVAL,
		    "Bloom filters can only be created on newest and oldest "
		    "chunks if bloom filters are enabled");

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval));
	if (cval.type == WT_CONFIG_ITEM_STRUCT) {
		cval.str++;
		cval.len -= 2;
	}
	WT_ERR(__wt_strndup(
	    session, cval.str, cval.len, &lsm_tree->bloom_config));

	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval));
	lsm_tree->bloom_bit_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval));
	lsm_tree->bloom_hash_count = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval));
	lsm_tree->chunk_max = (uint64_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval));
	lsm_tree->chunk_size = (uint64_t)cval.val;
	if (lsm_tree->chunk_size > lsm_tree->chunk_max)
		WT_ERR_MSG(session, EINVAL,
		    "Chunk size (chunk_size) must be smaller than or equal to "
		    "the maximum chunk size (chunk_max)");
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval));
	lsm_tree->merge_max = (uint32_t)cval.val;
	WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_min", &cval));
	lsm_tree->merge_min = (uint32_t)cval.val;
	if (lsm_tree->merge_min > lsm_tree->merge_max)
		WT_ERR_MSG(session, EINVAL,
		    "LSM merge_min must be less than or equal to merge_max");

	/*
	 * Set up the config for each chunk.
	 *
	 * Make the memory_page_max double the chunk size, so application
	 * threads don't immediately try to force evict the chunk when the
	 * worker thread clears the NO_EVICTION flag.
	 */
	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "%s,key_format=u,value_format=u,memory_page_max=%" PRIu64,
	    config, 2 * lsm_tree->chunk_max));
	WT_ERR(__wt_strndup(
	    session, buf->data, buf->size, &lsm_tree->file_config));

	/* Create the first chunk and flush the metadata. */
	WT_ERR(__wt_lsm_meta_write(session, lsm_tree));

	/* Discard our partially populated handle. */
	ret = __lsm_tree_discard(session, lsm_tree);
	lsm_tree = NULL;

	/*
	 * Open our new tree and add it to the handle cache. Don't discard on
	 * error: the returned handle is NULL on error, and the metadata
	 * tracking macros handle cleaning up on failure.
	 */
	if (ret == 0)
		ret = __lsm_tree_open(session, uri, &lsm_tree);
	if (ret == 0)
		__wt_lsm_tree_release(session, lsm_tree);

	if (0) {
err:		WT_TRET(__lsm_tree_discard(session, lsm_tree));
	}
	__wt_scr_free(&buf);
	return (ret);
}
Beispiel #29
0
/*
 * __wt_lsm_meta_read --
 *	Read the metadata for an LSM tree.
 */
int
__wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_CONFIG cparser, lparser;
	WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata;
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	char *lsmconfig;
	u_int nchunks;

	chunk = NULL;			/* -Wconditional-uninitialized */

	/* LSM trees inherit the merge setting from the connection. */
	if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
		F_SET(lsm_tree, WT_LSM_TREE_MERGES);

	WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconfig));
	WT_ERR(__wt_config_init(session, &cparser, lsmconfig));
	while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) {
		if (WT_STRING_MATCH("key_format", ck.str, ck.len)) {
			__wt_free(session, lsm_tree->key_format);
			WT_ERR(__wt_strndup(session,
			    cv.str, cv.len, &lsm_tree->key_format));
		} else if (WT_STRING_MATCH("value_format", ck.str, ck.len)) {
			__wt_free(session, lsm_tree->value_format);
			WT_ERR(__wt_strndup(session,
			    cv.str, cv.len, &lsm_tree->value_format));
		} else if (WT_STRING_MATCH("collator", ck.str, ck.len)) {
			if (cv.len == 0 ||
			    WT_STRING_MATCH("none", cv.str, cv.len))
				continue;
			/*
			 * Extract the application-supplied metadata (if any)
			 * from the file configuration.
			 */
			WT_ERR(__wt_config_getones(
			    session, lsmconfig, "file_config", &fileconf));
			WT_CLEAR(metadata);
			WT_ERR_NOTFOUND_OK(__wt_config_subgets(
			    session, &fileconf, "app_metadata", &metadata));
			WT_ERR(__wt_collator_config(session, lsm_tree->name,
			    &cv, &metadata,
			    &lsm_tree->collator, &lsm_tree->collator_owned));
			WT_ERR(__wt_strndup(session,
			    cv.str, cv.len, &lsm_tree->collator_name));
		} else if (WT_STRING_MATCH("bloom_config", ck.str, ck.len)) {
			__wt_free(session, lsm_tree->bloom_config);
			/* Don't include the brackets. */
			WT_ERR(__wt_strndup(session,
			    cv.str + 1, cv.len - 2, &lsm_tree->bloom_config));
		} else if (WT_STRING_MATCH("file_config", ck.str, ck.len)) {
			__wt_free(session, lsm_tree->file_config);
			/* Don't include the brackets. */
			WT_ERR(__wt_strndup(session,
			    cv.str + 1, cv.len - 2, &lsm_tree->file_config));
		} else if (WT_STRING_MATCH("auto_throttle", ck.str, ck.len)) {
			if (cv.val)
				F_SET(lsm_tree, WT_LSM_TREE_THROTTLE);
			else
				F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE);
		} else if (WT_STRING_MATCH("bloom", ck.str, ck.len))
			lsm_tree->bloom = (uint32_t)cv.val;
		else if (WT_STRING_MATCH("bloom_bit_count", ck.str, ck.len))
			lsm_tree->bloom_bit_count = (uint32_t)cv.val;
		else if (WT_STRING_MATCH("bloom_hash_count", ck.str, ck.len))
			lsm_tree->bloom_hash_count = (uint32_t)cv.val;
		else if (WT_STRING_MATCH("chunk_count_limit", ck.str, ck.len)) {
			lsm_tree->chunk_count_limit = (uint32_t)cv.val;
			if (cv.val != 0)
				F_CLR(lsm_tree, WT_LSM_TREE_MERGES);
		} else if (WT_STRING_MATCH("chunk_max", ck.str, ck.len))
			lsm_tree->chunk_max = (uint64_t)cv.val;
		else if (WT_STRING_MATCH("chunk_size", ck.str, ck.len))
			lsm_tree->chunk_size = (uint64_t)cv.val;
		else if (WT_STRING_MATCH("merge_max", ck.str, ck.len))
			lsm_tree->merge_max = (uint32_t)cv.val;
		else if (WT_STRING_MATCH("merge_min", ck.str, ck.len))
			lsm_tree->merge_min = (uint32_t)cv.val;
		else if (WT_STRING_MATCH("last", ck.str, ck.len))
			lsm_tree->last = (u_int)cv.val;
		else if (WT_STRING_MATCH("chunks", ck.str, ck.len)) {
			WT_ERR(__wt_config_subinit(session, &lparser, &cv));
			for (nchunks = 0; (ret =
			    __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
				if (WT_STRING_MATCH("id", lk.str, lk.len)) {
					WT_ERR(__wt_realloc_def(session,
					    &lsm_tree->chunk_alloc,
					    nchunks + 1, &lsm_tree->chunk));
					WT_ERR(
					    __wt_calloc_one(session, &chunk));
					lsm_tree->chunk[nchunks++] = chunk;
					chunk->id = (uint32_t)lv.val;
					WT_ERR(__wt_lsm_tree_chunk_name(session,
					    lsm_tree, chunk->id, &chunk->uri));
					F_SET(chunk,
					    WT_LSM_CHUNK_ONDISK |
					    WT_LSM_CHUNK_STABLE);
				} else if (WT_STRING_MATCH(
				    "bloom", lk.str, lk.len)) {
					WT_ERR(__wt_lsm_tree_bloom_name(
					    session, lsm_tree,
					    chunk->id, &chunk->bloom_uri));
					F_SET(chunk, WT_LSM_CHUNK_BLOOM);
					continue;
				} else if (WT_STRING_MATCH(
				    "chunk_size", lk.str, lk.len)) {
					chunk->size = (uint64_t)lv.val;
					continue;
				} else if (WT_STRING_MATCH(
				    "count", lk.str, lk.len)) {
					chunk->count = (uint64_t)lv.val;
					continue;
				} else if (WT_STRING_MATCH(
				    "generation", lk.str, lk.len)) {
					chunk->generation = (uint32_t)lv.val;
					continue;
				}
			}
			WT_ERR_NOTFOUND_OK(ret);
			lsm_tree->nchunks = nchunks;
		} else if (WT_STRING_MATCH("old_chunks", ck.str, ck.len)) {
			WT_ERR(__wt_config_subinit(session, &lparser, &cv));
			for (nchunks = 0; (ret =
			    __wt_config_next(&lparser, &lk, &lv)) == 0; ) {
				if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
					WT_ERR(__wt_strndup(session,
					    lv.str, lv.len, &chunk->bloom_uri));
					F_SET(chunk, WT_LSM_CHUNK_BLOOM);
					continue;
				}
				WT_ERR(__wt_realloc_def(session,
				    &lsm_tree->old_alloc, nchunks + 1,
				    &lsm_tree->old_chunks));
				WT_ERR(__wt_calloc_one(session, &chunk));
				lsm_tree->old_chunks[nchunks++] = chunk;
				WT_ERR(__wt_strndup(session,
				    lk.str, lk.len, &chunk->uri));
				F_SET(chunk, WT_LSM_CHUNK_ONDISK);
			}
			WT_ERR_NOTFOUND_OK(ret);
			lsm_tree->nold_chunks = nchunks;
		}
		/*
		 * Ignore any other values: the metadata entry might have been
		 * created by a future release, with unknown options.
		 */
	}
	WT_ERR_NOTFOUND_OK(ret);

	/*
	 * If the default merge_min was not overridden, calculate it now.  We
	 * do this here so that trees created before merge_min was added get a
	 * sane value.
	 */
	if (lsm_tree->merge_min < 2)
		lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2);

err:	__wt_free(session, lsmconfig);
	return (ret);
}
Beispiel #30
0
/*
 * __config_process_value --
 *	Deal with special config values like true / false.
 */
static int
__config_process_value(WT_CONFIG *conf, WT_CONFIG_ITEM *value)
{
	char *endptr;

	/* Empty values are okay: we can't do anything interesting with them. */
	if (value->len == 0)
		return (0);

	if (value->type == WT_CONFIG_ITEM_ID) {
		if (WT_STRING_MATCH("false", value->str, value->len)) {
			value->type = WT_CONFIG_ITEM_BOOL;
			value->val = 0;
		} else if (WT_STRING_MATCH("true", value->str, value->len)) {
			value->type = WT_CONFIG_ITEM_BOOL;
			value->val = 1;
		}
	} else if (value->type == WT_CONFIG_ITEM_NUM) {
		errno = 0;
		value->val = strtoll(value->str, &endptr, 10);

		/* Check any leftover characters. */
		while (endptr < value->str + value->len)
			switch (*endptr++) {
			case 'b':
			case 'B':
				/* Byte: no change. */
				break;
			case 'k':
			case 'K':
				WT_SHIFT_INT64(value->val, 10);
				break;
			case 'm':
			case 'M':
				WT_SHIFT_INT64(value->val, 20);
				break;
			case 'g':
			case 'G':
				WT_SHIFT_INT64(value->val, 30);
				break;
			case 't':
			case 'T':
				WT_SHIFT_INT64(value->val, 40);
				break;
			case 'p':
			case 'P':
				WT_SHIFT_INT64(value->val, 50);
				break;
			default:
				/*
				 * We didn't get a well-formed number.  That
				 * might be okay, the required type will be
				 * checked by __wt_config_check.
				 */
				value->type = WT_CONFIG_ITEM_ID;
				break;
			}

		/*
		 * If we parsed the whole string but the number is out of range,
		 * report an error.  Don't report an error for strings that
		 * aren't well-formed integers: if an integer is expected, that
		 * will be caught by __wt_config_check.
		 */
		if (value->type == WT_CONFIG_ITEM_NUM && errno == ERANGE)
			goto range;
	}

	return (0);

range:	return (__config_err(conf, "Number out of range", ERANGE));
}