예제 #1
0
/*
 * __wt_struct_reformat --
 *	Given a table and a list of columns (which could be values in a column
 *	group or index keys), calculate the resulting new format string.
 *	The result will be appended to the format buffer.
 */
int
__wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table,
    const char *columns, size_t len, const char *extra_cols, int value_only,
    WT_ITEM *format)
{
	WT_CONFIG config;
	WT_CONFIG_ITEM k, next_k, next_v;
	WT_DECL_RET;
	WT_PACK_VALUE pv;
	int have_next;

	WT_CLEAR(pv);		/* -Wuninitialized */

	WT_RET(__wt_config_initn(session, &config, columns, len));
	WT_RET(__wt_config_next(&config, &next_k, &next_v));
	do {
		k = next_k;
		ret = __wt_config_next(&config, &next_k, &next_v);
		if (ret != 0 && ret != WT_NOTFOUND)
			return (ret);
		have_next = (ret == 0);

		if (!have_next && extra_cols != NULL) {
			WT_RET(__wt_config_init(session, &config, extra_cols));
			WT_RET(__wt_config_next(&config, &next_k, &next_v));
			have_next = 1;
			extra_cols = NULL;
		}

		if ((ret = __find_column_format(session,
		    table, &k, value_only, &pv)) != 0) {
			if (value_only && ret == EINVAL)
				WT_RET_MSG(session, EINVAL,
				    "A column group cannot store key column "
				    "'%.*s' in its value", (int)k.len, k.str);
			WT_RET_MSG(session, EINVAL,
			    "Column '%.*s' not found", (int)k.len, k.str);
		}

		/*
		 * Check whether we're moving an unsized WT_ITEM from the end
		 * to the middle, or vice-versa.  This determines whether the
		 * size needs to be prepended.  This is the only case where the
		 * destination size can be larger than the source size.
		 */
		if (pv.type == 'u' && !pv.havesize && have_next)
			pv.type = 'U';
		else if (pv.type == 'U' && !have_next)
			pv.type = 'u';

		if (pv.havesize)
			WT_RET(__wt_buf_catfmt(
			    session, format, "%d%c", (int)pv.size, pv.type));
		else
			WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
	} while (have_next);

	return (0);
}
예제 #2
0
/*
 * __wt_meta_snaplist_set --
 *	Set a file's snapshot value from the WT_SNAPSHOT list.
 */
int
__wt_meta_snaplist_set(
    WT_SESSION_IMPL *session, const char *name, WT_SNAPSHOT *snapbase)
{
	WT_DECL_RET;
	WT_ITEM *buf;
	WT_SNAPSHOT *snap;
	int64_t order;
	const char *sep;

	buf = NULL;

	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	order = 0;
	sep = "";
	WT_ERR(__wt_buf_fmt(session, buf, "snapshot=("));
	WT_SNAPSHOT_FOREACH(snapbase, snap) {
		/* Skip deleted snapshots. */
		if (F_ISSET(snap, WT_SNAP_DELETE))
			continue;

		/*
		 * Track the largest active snapshot counter: it's not really
		 * a generational number or an ID because we reset it to 1 if
		 * the snapshot we're writing is the only snapshot the file has.
		 * The problem we're solving is when two snapshots are taken
		 * quickly, the timer may not be unique and/or we can even see
		 * time travel on the second snapshot if we read the time
		 * in-between nanoseconds rolling over.  All we need to know
		 * is the real snapshot order so we don't accidentally take the
		 * wrong "last" snapshot.
		 */
		if (snap->order > order)
			order = snap->order;

		if (F_ISSET(snap, WT_SNAP_ADD | WT_SNAP_UPDATE)) {
			/* Convert the raw cookie to a hex string. */
			WT_ERR(__wt_raw_to_hex(session,
			    snap->raw.data, snap->raw.size, &snap->addr));

			if (F_ISSET(snap, WT_SNAP_ADD))
				snap->order = order + 1;
		}
		WT_ERR(__wt_buf_catfmt(session, buf,
		    "%s%s=(addr=\"%.*s\",order=%" PRIu64
		    ",time=%" PRIuMAX ",size=%" PRIu64 ")",
		    sep, snap->name,
		    (int)snap->addr.size, (char *)snap->addr.data,
		    snap->order, snap->sec, snap->snapshot_size));
		sep = ",";
	}
	WT_ERR(__wt_buf_catfmt(session, buf, ")"));
	WT_ERR(__snap_set(session, name, buf->mem));

err:	__wt_scr_free(&buf);

	return (ret);
}
예제 #3
0
/*
 * __ckpt_string --
 *	Return a printable string representation of a checkpoint address cookie.
 */
static int
__ckpt_string(WT_SESSION_IMPL *session,
    WT_BLOCK *block, const uint8_t *addr, WT_ITEM *buf)
{
	WT_BLOCK_CKPT *ci, _ci;

	/* Initialize the checkpoint, crack the cookie. */
	ci = &_ci;
	WT_RET(__wt_block_ckpt_init(session, block, ci, "string", 0));
	WT_RET(__wt_block_buffer_to_ckpt(session, block, addr, ci));

	WT_RET(__wt_buf_fmt(session, buf,
	    "version=%d",
	    ci->version));
	if (ci->root_offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", root=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)ci->root_offset,
		    (uintmax_t)(ci->root_offset + ci->root_size),
		    ci->root_size, ci->root_cksum));
	if (ci->alloc.offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", alloc=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)ci->alloc.offset,
		    (uintmax_t)(ci->alloc.offset + ci->alloc.size),
		    ci->alloc.size, ci->alloc.cksum));
	if (ci->avail.offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", avail=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", avail=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)ci->avail.offset,
		    (uintmax_t)(ci->avail.offset + ci->avail.size),
		    ci->avail.size, ci->avail.cksum));
	if (ci->discard.offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", discard=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", discard=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)ci->discard.offset,
		    (uintmax_t)(ci->discard.offset + ci->discard.size),
		    ci->discard.size, ci->discard.cksum));
	WT_RET(__wt_buf_catfmt(session, buf,
	    ", file size=%" PRIuMAX
	    ", write generation=%" PRIu64,
	    (uintmax_t)ci->file_size,
	    ci->write_gen));

	__wt_block_ckpt_destroy(session, ci);

	return (0);
}
예제 #4
0
/*
 * __snapshot_string --
 *	Return a printable string representation of a snapshot address cookie.
 */
static int
__snapshot_string(WT_SESSION_IMPL *session,
    WT_BLOCK *block, const uint8_t *addr, WT_ITEM *buf)
{
	WT_BLOCK_SNAPSHOT *si, _si;

	/* Initialize the snapshot, crack the cookie. */
	si = &_si;
	WT_RET(__wt_block_snap_init(session, block, si, "string", 0));
	WT_RET(__wt_block_buffer_to_snapshot(session, block, addr, si));

	WT_RET(__wt_buf_fmt(session, buf,
	    "version=%d",
	    si->version));
	if (si->root_offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", root=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)si->root_offset,
		    (uintmax_t)(si->root_offset + si->root_size),
		    si->root_size, si->root_cksum));
	if (si->alloc.offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", alloc=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)si->alloc.offset,
		    (uintmax_t)(si->alloc.offset + si->alloc.size),
		    si->alloc.size, si->alloc.cksum));
	if (si->avail.offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", avail=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", avail=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)si->avail.offset,
		    (uintmax_t)(si->avail.offset + si->avail.size),
		    si->avail.size, si->avail.cksum));
	if (si->discard.offset == WT_BLOCK_INVALID_OFFSET)
		WT_RET(__wt_buf_catfmt(session, buf, ", discard=[Empty]"));
	else
		WT_RET(__wt_buf_catfmt(session, buf,
		    ", discard=[%"
		    PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]",
		    (uintmax_t)si->discard.offset,
		    (uintmax_t)(si->discard.offset + si->discard.size),
		    si->discard.size, si->discard.cksum));
	WT_RET(__wt_buf_catfmt(session, buf,
	    ", file size=%" PRIuMAX
	    ", write generation=%" PRIu64,
	    (uintmax_t)si->file_size,
	    si->write_gen));

	return (0);
}
예제 #5
0
/*
 * __wt_config_concat --
 *	Given a NULL-terminated list of configuration strings, concatenate them
 *	into a newly allocated buffer.  Nothing special is assumed about any
 *	of the config strings, they are simply combined in order.
 *
 *	This code deals with the case where some of the config strings are
 *	wrapped in brackets but others aren't: the resulting string does not
 *	have brackets.
 */
int
__wt_config_concat(
    WT_SESSION_IMPL *session, const char **cfg, const char **config_ret)
{
	WT_CONFIG cparser;
	WT_CONFIG_ITEM k, v;
	WT_ITEM buf;
	int ret;
	const char **cp;

	WT_CLEAR(buf);
	ret = 0;

	for (cp = cfg; *cp != NULL; ++cp) {
		WT_ERR(__wt_config_init(session, &cparser, *cp));
		while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
			if (k.type != ITEM_STRING && k.type != ITEM_ID)
				WT_ERR_MSG(session, EINVAL,
				    "Invalid configuration key found: '%s'\n",
				    k.str);
			/* Include the quotes around string keys/values. */
			if (k.type == ITEM_STRING) {
				--k.str;
				k.len += 2;
			}
			if (v.type == ITEM_STRING) {
				--v.str;
				v.len += 2;
			}
			WT_ERR(__wt_buf_catfmt(session, &buf, "%.*s%s%.*s,",
			    (int)k.len, k.str,
			    (v.len > 0) ? "=" : "",
			    (int)v.len, v.str));
		}
		if (ret != WT_NOTFOUND)
			goto err;
	}

	/*
	 * If the caller passes us no valid configuration strings, we end up
	 * here with no allocated memory to return.  Check the final buffer
	 * size: empty configuration strings are possible, and paranoia is
	 * good.
	 */
	if (buf.size == 0)
		WT_RET(__wt_buf_initsize(session, &buf, 1));

	/* Strip the trailing comma and NUL-terminate */
	((char *)buf.data)[buf.size - 1] = '\0';

	*config_ret = buf.data;
	return (0);

err:	__wt_buf_free(session, &buf);
	return (ret);
}
예제 #6
0
파일: schema_plan.c 프로젝트: ajdavis/mongo
/*
 * __wt_struct_truncate --
 *	Return a packing string for the first N columns in a value.
 */
int
__wt_struct_truncate(WT_SESSION_IMPL *session,
    const char *input_fmt, u_int ncols, WT_ITEM *format)
{
	WT_DECL_PACK_VALUE(pv);
	WT_PACK pack;

	WT_RET(__pack_init(session, &pack, input_fmt));
	while (ncols-- > 0) {
		WT_RET(__pack_next(&pack, &pv));
		if (pv.havesize)
			WT_RET(__wt_buf_catfmt(session,
			    format, "%" PRIu32 "%c", pv.size, pv.type));
		else
			WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
	}

	return (0);
}
예제 #7
0
/*
 * __wt_struct_truncate --
 *	Return a packing string for the first N columns in a value.
 */
int
__wt_struct_truncate(WT_SESSION_IMPL *session,
    const char *input_fmt, u_int ncols, WT_ITEM *format)
{
	WT_PACK pack;
	WT_PACK_VALUE pv;

	WT_CLEAR(pv);   /* -Wuninitialized */

	WT_RET(__pack_init(session, &pack, input_fmt));
	while (ncols-- > 0) {
		WT_RET(__pack_next(&pack, &pv));
		if (pv.havesize)
			WT_RET(__wt_buf_catfmt(
			    session, format, "%d%c", (int)pv.size, pv.type));
		else
			WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
	}

	return (0);
}
예제 #8
0
/*
 * __wt_config_collapse --
 *	Collapse a set of configuration strings into newly allocated memory.
 *
 * This function takes a NULL-terminated list of configuration strings (where
 * the first one contains all the defaults and the values are in order from
 * least to most preferred, that is, the default values are least preferred),
 * and collapses them into newly allocated memory.  The algorithm is to walk
 * the first of the configuration strings, and for each entry, search all of
 * the configuration strings for a final value, keeping the last value found.
 *
 * Notes:
 *	Any key not appearing in the first configuration string is discarded
 *	from the final result, because we'll never search for it.
 *
 *	Nested structures aren't parsed.  For example, imagine a configuration
 *	string contains "key=(k2=v2,k3=v3)", and a subsequent string has
 *	"key=(k4=v4)", the result will be "key=(k4=v4)", as we search for and
 *	use the final value of "key", regardless of field overlap or missing
 *	fields in the nested value.
 */
int
__wt_config_collapse(
    WT_SESSION_IMPL *session, const char **cfg, char **config_ret)
{
	WT_CONFIG cparser;
	WT_CONFIG_ITEM k, v;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;

	*config_ret = NULL;

	WT_RET(__wt_scr_alloc(session, 0, &tmp));

	__wt_config_init(session, &cparser, cfg[0]);
	while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) {
		if (k.type != WT_CONFIG_ITEM_STRING &&
		    k.type != WT_CONFIG_ITEM_ID)
			WT_ERR_MSG(session, EINVAL,
			    "Invalid configuration key found: '%s'", k.str);
		WT_ERR(__wt_config_get(session, cfg, &k, &v));
		/* Include the quotes around string keys/values. */
		if (k.type == WT_CONFIG_ITEM_STRING) {
			--k.str;
			k.len += 2;
		}
		if (v.type == WT_CONFIG_ITEM_STRING) {
			--v.str;
			v.len += 2;
		}
		WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,",
		    (int)k.len, k.str, (int)v.len, v.str));
	}

	/* We loop until error, and the expected error is WT_NOTFOUND. */
	if (ret != WT_NOTFOUND)
		goto err;

	/*
	 * If the caller passes us no valid configuration strings, we get here
	 * with no bytes to copy -- that's OK, the underlying string copy can
	 * handle empty strings.
	 *
	 * Strip any trailing comma.
	 */
	if (tmp->size != 0)
		--tmp->size;
	ret = __wt_strndup(session, tmp->data, tmp->size, config_ret);

err:	__wt_scr_free(session, &tmp);
	return (ret);
}
예제 #9
0
/*
 * __wt_config_upgrade --
 *	Upgrade a configuration string by appended the replacement version.
 */
int
__wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf)
{
	WT_CONFIG_ITEM v;
	const char *config;

	config = buf->data;

	/*
	 * wiredtiger_open:
	 *	lsm_merge=boolean -> lsm_manager=(merge=boolean)
	 */
	if (__wt_config_getones(
	    session, config, "lsm_merge", &v) != WT_NOTFOUND)
		WT_RET(__wt_buf_catfmt(session, buf,
		    ",lsm_manager=(merge=%s)", v.val ? "true" : "false"));

	return (0);
}
예제 #10
0
/*
 * __open_index --
 *	Open an index.
 */
static int
__open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx)
{
	WT_CONFIG colconf;
	WT_CONFIG_ITEM ckey, cval, metadata;
	WT_DECL_ITEM(buf);
	WT_DECL_ITEM(plan);
	WT_DECL_RET;
	u_int npublic_cols, i;

	WT_ERR(__wt_scr_alloc(session, 0, &buf));

	/* Get the data source from the index config. */
	WT_ERR(__wt_config_getones(session, idx->config, "source", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->source));

	WT_ERR(__wt_config_getones(session, idx->config, "immutable", &cval));
	if (cval.val)
		F_SET(idx, WT_INDEX_IMMUTABLE);

	/*
	 * Compatibility: we didn't always maintain collator information in
	 * index metadata, cope when it isn't found.
	 */
	WT_CLEAR(cval);
	WT_ERR_NOTFOUND_OK(__wt_config_getones(
	    session, idx->config, "collator", &cval));
	if (cval.len != 0) {
		WT_CLEAR(metadata);
		WT_ERR_NOTFOUND_OK(__wt_config_getones(
		    session, idx->config, "app_metadata", &metadata));
		WT_ERR(__wt_collator_config(
		    session, idx->name, &cval, &metadata,
		    &idx->collator, &idx->collator_owned));
	}

	WT_ERR(__wt_extractor_config(
	    session, idx->name, idx->config, &idx->extractor,
	    &idx->extractor_owned));

	WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval));
	WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->key_format));

	/*
	 * The key format for an index is somewhat subtle: the application
	 * specifies a set of columns that it will use for the key, but the
	 * engine usually adds some hidden columns in order to derive the
	 * primary key.  These hidden columns are part of the file's key.
	 *
	 * The file's key_format is stored persistently, we need to calculate
	 * the index cursor key format (which will usually omit some of those
	 * keys).
	 */
	WT_ERR(__wt_buf_init(session, buf, 0));
	WT_ERR(__wt_config_getones(
	    session, idx->config, "columns", &idx->colconf));

	/* Start with the declared index columns. */
	WT_ERR(__wt_config_subinit(session, &colconf, &idx->colconf));
	for (npublic_cols = 0;
	    (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0;
	    ++npublic_cols)
		WT_ERR(__wt_buf_catfmt(
		    session, buf, "%.*s,", (int)ckey.len, ckey.str));
	if (ret != WT_NOTFOUND)
		goto err;

	/*
	 * If we didn't find any columns, the index must have an extractor.
	 * We don't rely on this unconditionally because it was only added to
	 * the metadata after version 2.3.1.
	 */
	if (npublic_cols == 0) {
		WT_ERR(__wt_config_getones(
		    session, idx->config, "index_key_columns", &cval));
		npublic_cols = (u_int)cval.val;
		WT_ASSERT(session, npublic_cols != 0);
		for (i = 0; i < npublic_cols; i++)
			WT_ERR(__wt_buf_catfmt(session, buf, "\"bad col\","));
	}

	/*
	 * Now add any primary key columns from the table that are not
	 * already part of the index key.
	 */
	WT_ERR(__wt_config_subinit(session, &colconf, &table->colconf));
	for (i = 0; i < table->nkey_columns &&
	    (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0;
	    i++) {
		/*
		 * If the primary key column is already in the secondary key,
		 * don't add it again.
		 */
		if (__wt_config_subgetraw(
		    session, &idx->colconf, &ckey, &cval) == 0)
			continue;
		WT_ERR(__wt_buf_catfmt(
		    session, buf, "%.*s,", (int)ckey.len, ckey.str));
	}
	WT_ERR_NOTFOUND_OK(ret);

	/*
	 * If the table doesn't yet have its column groups, don't try to
	 * calculate a plan: we are just checking that the index creation is
	 * sane.
	 */
	if (!table->cg_complete)
		goto err;

	WT_ERR(__wt_scr_alloc(session, 0, &plan));
	WT_ERR(__wt_struct_plan(
	    session, table, buf->data, buf->size, false, plan));
	WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->key_plan));

	/* Set up the cursor key format (the visible columns). */
	WT_ERR(__wt_buf_init(session, buf, 0));
	WT_ERR(__wt_struct_truncate(session,
	    idx->key_format, npublic_cols, buf));
	WT_ERR(__wt_strndup(
	    session, buf->data, buf->size, &idx->idxkey_format));

	/*
	 * Add a trailing padding byte to the format.  This ensures that there
	 * will be no special optimization of the last column, so the primary
	 * key columns can be simply appended.
	 */
	WT_ERR(__wt_buf_catfmt(session, buf, "x"));
	WT_ERR(__wt_strndup(session, buf->data, buf->size, &idx->exkey_format));

	/* By default, index cursor values are the table value columns. */
	/* TODO Optimize to use index columns in preference to table lookups. */
	WT_ERR(__wt_buf_init(session, plan, 0));
	WT_ERR(__wt_struct_plan(session,
	    table, table->colconf.str, table->colconf.len, true, plan));
	WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->value_plan));

err:	__wt_scr_free(session, &buf);
	__wt_scr_free(session, &plan);
	return (ret);
}
예제 #11
0
/*
 * __wt_meta_ckptlist_set --
 *	Set a file's checkpoint value from the WT_CKPT list.
 */
int
__wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
    const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn)
{
	WT_CKPT *ckpt;
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	time_t secs;
	int64_t maxorder;
	const char *sep;

	WT_ERR(__wt_scr_alloc(session, 0, &buf));
	maxorder = 0;
	sep = "";
	WT_ERR(__wt_buf_fmt(session, buf, "checkpoint=("));
	WT_CKPT_FOREACH(ckptbase, ckpt) {
		/*
		 * Each internal checkpoint name is appended with a generation
		 * to make it a unique name.  We're solving two problems: when
		 * two checkpoints are taken quickly, the timer may not be
		 * unique and/or we can even see time travel on the second
		 * checkpoint if we snapshot the time in-between nanoseconds
		 * rolling over.  Second, if we reset the generational counter
		 * when new checkpoints arrive, we could logically re-create
		 * specific checkpoints, racing with cursors open on those
		 * checkpoints.  I can't think of any way to return incorrect
		 * results by racing with those cursors, but it's simpler not
		 * to worry about it.
		 */
		if (ckpt->order > maxorder)
			maxorder = ckpt->order;

		/* Skip deleted checkpoints. */
		if (F_ISSET(ckpt, WT_CKPT_DELETE))
			continue;

		if (F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_UPDATE)) {
			/*
			 * We fake checkpoints for handles in the middle of a
			 * bulk load.  If there is a checkpoint, convert the
			 * raw cookie to a hex string.
			 */
			if (ckpt->raw.size == 0)
				ckpt->addr.size = 0;
			else
				WT_ERR(__wt_raw_to_hex(session,
				    ckpt->raw.data,
				    ckpt->raw.size, &ckpt->addr));

			/* Set the order and timestamp. */
			if (F_ISSET(ckpt, WT_CKPT_ADD))
				ckpt->order = ++maxorder;

			/*
			 * XXX
			 * Assumes a time_t fits into a uintmax_t, which isn't
			 * guaranteed, a time_t has to be an arithmetic type,
			 * but not an integral type.
			 */
			WT_ERR(__wt_seconds(session, &secs));
			ckpt->sec = (uintmax_t)secs;
		}
		if (strcmp(ckpt->name, WT_CHECKPOINT) == 0)
			WT_ERR(__wt_buf_catfmt(session, buf,
			    "%s%s.%" PRId64 "=(addr=\"%.*s\",order=%" PRIu64
			    ",time=%" PRIuMAX ",size=%" PRIu64
			    ",write_gen=%" PRIu64 ")",
			    sep, ckpt->name, ckpt->order,
			    (int)ckpt->addr.size, (char *)ckpt->addr.data,
			    ckpt->order, ckpt->sec, ckpt->ckpt_size,
			    ckpt->write_gen));
		else
			WT_ERR(__wt_buf_catfmt(session, buf,
			    "%s%s=(addr=\"%.*s\",order=%" PRIu64
			    ",time=%" PRIuMAX ",size=%" PRIu64
			    ",write_gen=%" PRIu64 ")",
			    sep, ckpt->name,
			    (int)ckpt->addr.size, (char *)ckpt->addr.data,
			    ckpt->order, ckpt->sec, ckpt->ckpt_size,
			    ckpt->write_gen));
		sep = ",";
	}
	WT_ERR(__wt_buf_catfmt(session, buf, ")"));
	if (ckptlsn != NULL)
		WT_ERR(__wt_buf_catfmt(session, buf,
		    ",checkpoint_lsn=(%" PRIu32 ",%" PRIuMAX ")",
		    ckptlsn->file, (uintmax_t)ckptlsn->offset));
	WT_ERR(__ckpt_set(session, fname, buf->mem));

err:	__wt_scr_free(&buf);
	return (ret);
}
예제 #12
0
/*
 * __create_index --
 *	Create an index.
 */
static int
__create_index(WT_SESSION_IMPL *session,
    const char *name, int exclusive, const char *config)
{
	WT_CONFIG pkcols;
	WT_CONFIG_ITEM ckey, cval, icols;
	WT_DECL_RET;
	WT_ITEM confbuf, extra_cols, fmt, namebuf;
	WT_TABLE *table;
	const char *cfg[4] =
	    { WT_CONFIG_BASE(session, index_meta), NULL, NULL, NULL };
	const char *sourcecfg[] = { config, NULL, NULL };
	const char *sourceconf, *source, *idxconf, *idxname;
	const char *tablename;
	size_t tlen;
	u_int i;

	idxconf = sourceconf = NULL;
	WT_CLEAR(confbuf);
	WT_CLEAR(fmt);
	WT_CLEAR(extra_cols);
	WT_CLEAR(namebuf);

	tablename = name;
	if (!WT_PREFIX_SKIP(tablename, "index:"))
		return (EINVAL);
	idxname = strchr(tablename, ':');
	if (idxname == NULL)
		WT_RET_MSG(session, EINVAL, "Invalid index name, "
		    "should be <table name>:<index name>: %s", name);

	tlen = (size_t)(idxname++ - tablename);
	if ((ret =
	    __wt_schema_get_table(session, tablename, tlen, 1, &table)) != 0)
		WT_RET_MSG(session, ret,
		    "Can't create an index for a non-existent table: %.*s",
		    (int)tlen, tablename);

	if (__wt_config_getones(session, config, "source", &cval) == 0) {
		WT_ERR(__wt_buf_fmt(session, &namebuf,
		    "%.*s", (int)cval.len, cval.str));
		source = namebuf.data;
	} else {
		WT_ERR(__wt_schema_index_source(
		    session, table, idxname, config, &namebuf));
		source = namebuf.data;

		/* Add the source name to the index config before collapsing. */
		WT_ERR(__wt_buf_catfmt(session, &confbuf,
		    ",source=\"%s\"", source));
	}

	/* Calculate the key/value formats. */
	if (__wt_config_getones(session, config, "columns", &icols) != 0)
		WT_ERR_MSG(session, EINVAL,
		    "No 'columns' configuration for '%s'", name);

	/*
	 * The key format for an index is somewhat subtle: the application
	 * specifies a set of columns that it will use for the key, but the
	 * engine usually adds some hidden columns in order to derive the
	 * primary key.  These hidden columns are part of the source's
	 * key_format, which we are calculating now, but not part of an index
	 * cursor's key_format.
	 */
	WT_ERR(__wt_config_subinit(session, &pkcols, &table->colconf));
	for (i = 0; i < table->nkey_columns &&
	    (ret = __wt_config_next(&pkcols, &ckey, &cval)) == 0;
	    i++) {
		/*
		 * If the primary key column is already in the secondary key,
		 * don't add it again.
		 */
		if (__wt_config_subgetraw(session, &icols, &ckey, &cval) == 0)
			continue;
		WT_ERR(__wt_buf_catfmt(
		    session, &extra_cols, "%.*s,", (int)ckey.len, ckey.str));
	}
	if (ret != 0 && ret != WT_NOTFOUND)
		goto err;

	/*
	 * Index values are normally empty: all columns are packed into the
	 * index key.  The exception is LSM, which (currently) reserves empty
	 * values as tombstones.  Use a single padding byte in that case.
	 */
	if (WT_PREFIX_MATCH(source, "lsm:"))
		WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=x,"));
	else
		WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,"));
	WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,key_format="));
	WT_ERR(__wt_struct_reformat(session, table,
	    icols.str, icols.len, (const char *)extra_cols.data, 0, &fmt));

	/* Check for a record number index key, which makes no sense. */
	WT_ERR(__wt_config_getones(session, fmt.data, "key_format", &cval));
	if (cval.len == 1 && cval.str[0] == 'r')
		WT_ERR_MSG(session, EINVAL,
		    "column-store index may not use the record number as its "
		    "index key");

	sourcecfg[1] = fmt.data;
	WT_ERR(__wt_config_concat(session, sourcecfg, &sourceconf));

	WT_ERR(__wt_schema_create(session, source, sourceconf));

	cfg[1] = sourceconf;
	cfg[2] = confbuf.data;
	WT_ERR(__wt_config_collapse(session, cfg, &idxconf));
	if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) {
		/*
		 * If the entry already exists in the metadata, we're done.
		 * This is an error for exclusive creates but okay otherwise.
		 */
		if (ret == WT_DUPLICATE_KEY)
			ret = exclusive ? EEXIST : 0;
		goto err;
	}

err:	__wt_free(session, idxconf);
	__wt_free(session, sourceconf);
	__wt_buf_free(session, &confbuf);
	__wt_buf_free(session, &extra_cols);
	__wt_buf_free(session, &fmt);
	__wt_buf_free(session, &namebuf);

	__wt_schema_release_table(session, table);
	return (ret);
}
예제 #13
0
/*
 * __config_merge_format_next --
 *	Walk the array, building entries.
 */
static int
__config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix,
    size_t plen, size_t *enp, WT_CONFIG_MERGE *cp, WT_ITEM *build)
{
	WT_CONFIG_MERGE_ENTRY *ep;
	size_t len1, len2, next;
	char *p;

	for (; *enp < cp->entries_next; ++*enp) {
		ep = &cp->entries[*enp];
		len1 = strlen(ep->k);

		/*
		 * The entries are in sorted order, take the last entry for any
		 * key.
		 */
		if (*enp < (cp->entries_next - 1)) {
			len2 = strlen((ep + 1)->k);

			/* Choose the last of identical keys. */
			if (len1 == len2 &&
			    memcmp(ep->k, (ep + 1)->k, len1) == 0)
				continue;

			/*
			 * The test is complicated by matching empty entries
			 * "foo=" against nested structures "foo,bar=", where
			 * the latter is a replacement for the former.
			 */
			if (len2 > len1 &&
			    (ep + 1)->k[len1] == SEPC &&
			    memcmp(ep->k, (ep + 1)->k, len1) == 0)
				continue;
		}

		/*
		 * If we're skipping a prefix and this entry doesn't match it,
		 * back off one entry and pop up a level.
		 */
		if (plen != 0 &&
		    (plen > len1 || memcmp(ep->k, prefix, plen) != 0)) {
			--*enp;
			break;
		}

		/*
		 * If the entry introduces a new level, recurse through that
		 * new level.
		 */
		if ((p = strchr(ep->k + plen, SEPC)) != NULL) {
			next = WT_PTRDIFF(p, ep->k);
			WT_RET(__wt_buf_catfmt(session,
			    build, "%.*s=(", (int)(next - plen), ep->k + plen));
			WT_RET(__config_merge_format_next(
			    session, ep->k, next + 1, enp, cp, build));
			__strip_comma(build);
			WT_RET(__wt_buf_catfmt(session, build, "),"));
			continue;
		}

		/* Append the entry to the buffer. */
		WT_RET(__wt_buf_catfmt(
		    session, build, "%s=%s,", ep->k + plen, ep->v));
	}

	return (0);
}
예제 #14
0
/*
 * ___open_index --
 *	Open an index.
 */
static int
__open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx)
{
	WT_CONFIG colconf;
	WT_CONFIG_ITEM ckey, cval;
	WT_DECL_ITEM(buf);
	WT_DECL_ITEM(plan);
	WT_DECL_RET;
	u_int cursor_key_cols, i;

	WT_ERR(__wt_scr_alloc(session, 0, &buf));

	/* Get the data source from the index config. */
	WT_ERR(__wt_config_getones(session, idx->config, "source", &cval));
	WT_ERR(__wt_buf_fmt(
	    session, buf, "%.*s", (int)cval.len, cval.str));
	idx->source = __wt_buf_steal(session, buf, NULL);
	idx->need_value = WT_PREFIX_MATCH(idx->source, "lsm:");

	WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval));
	WT_ERR(__wt_buf_fmt(
	    session, buf, "%.*s", (int)cval.len, cval.str));
	idx->key_format = __wt_buf_steal(session, buf, NULL);

	/*
	 * The key format for an index is somewhat subtle: the application
	 * specifies a set of columns that it will use for the key, but the
	 * engine usually adds some hidden columns in order to derive the
	 * primary key.  These hidden columns are part of the file's key.
	 *
	 * The file's key_format is stored persistently, we need to calculate
	 * the index cursor key format (which will usually omit some of those
	 * keys).
	 */
	WT_ERR(__wt_config_getones(
	    session, idx->config, "columns", &idx->colconf));

	/* Start with the declared index columns. */
	WT_ERR(__wt_config_subinit(session, &colconf, &idx->colconf));
	cursor_key_cols = 0;
	while ((ret = __wt_config_next(&colconf, &ckey, &cval)) == 0) {
		WT_ERR(__wt_buf_catfmt(
		    session, buf, "%.*s,", (int)ckey.len, ckey.str));
		++cursor_key_cols;
	}
	if (ret != 0 && ret != WT_NOTFOUND)
		goto err;

	/*
	 * Now add any primary key columns from the table that are not
	 * already part of the index key.
	 */
	WT_ERR(__wt_config_subinit(session, &colconf, &table->colconf));
	for (i = 0; i < table->nkey_columns &&
	    (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0;
	    i++) {
		/*
		 * If the primary key column is already in the secondary key,
		 * don't add it again.
		 */
		if (__wt_config_subgetraw(
		    session, &idx->colconf, &ckey, &cval) == 0)
			continue;
		WT_ERR(__wt_buf_catfmt(
		    session, buf, "%.*s,", (int)ckey.len, ckey.str));
	}
	if (ret != 0 && ret != WT_NOTFOUND)
		goto err;

	WT_ERR(__wt_scr_alloc(session, 0, &plan));
	WT_ERR(__wt_struct_plan(session, table, buf->data, buf->size, 0, plan));
	idx->key_plan = __wt_buf_steal(session, plan, NULL);

	/* Set up the cursor key format (the visible columns). */
	WT_ERR(__wt_buf_init(session, buf, 0));
	WT_ERR(__wt_struct_truncate(session,
	    idx->key_format, cursor_key_cols, buf));
	idx->idxkey_format = __wt_buf_steal(session, buf, NULL);

	/* By default, index cursor values are the table value columns. */
	/* TODO Optimize to use index columns in preference to table lookups. */
	WT_ERR(__wt_struct_plan(session,
	    table, table->colconf.str, table->colconf.len, 1, plan));
	idx->value_plan = __wt_buf_steal(session, plan, NULL);

err:	__wt_scr_free(&buf);
	__wt_scr_free(&plan);
	return (ret);
}
예제 #15
0
파일: schema_plan.c 프로젝트: ajdavis/mongo
/*
 * __wt_struct_reformat --
 *	Given a table and a list of columns (which could be values in a column
 *	group or index keys), calculate the resulting new format string.
 *	The result will be appended to the format buffer.
 */
int
__wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table,
    const char *columns, size_t len, const char *extra_cols, bool value_only,
    WT_ITEM *format)
{
	WT_CONFIG config;
	WT_CONFIG_ITEM k, next_k, next_v;
	WT_DECL_PACK_VALUE(pv);
	WT_DECL_RET;
	bool have_next;

	__wt_config_initn(session, &config, columns, len);
	/*
	 * If an empty column list is specified, this will fail with
	 * WT_NOTFOUND, that's okay.
	 */
	WT_RET_NOTFOUND_OK(ret = __wt_config_next(&config, &next_k, &next_v));
	if (ret == WT_NOTFOUND) {
		if (extra_cols != NULL) {
			__wt_config_init(session, &config, extra_cols);
			WT_RET(__wt_config_next(&config, &next_k, &next_v));
			extra_cols = NULL;
		} else if (format->size == 0) {
			WT_RET(__wt_buf_set(session, format, "", 1));
			return (0);
		}
	}
	do {
		k = next_k;
		ret = __wt_config_next(&config, &next_k, &next_v);
		if (ret != 0 && ret != WT_NOTFOUND)
			return (ret);
		have_next = ret == 0;

		if (!have_next && extra_cols != NULL) {
			__wt_config_init(session, &config, extra_cols);
			WT_RET(__wt_config_next(&config, &next_k, &next_v));
			have_next = true;
			extra_cols = NULL;
		}

		if ((ret = __find_column_format(session,
		    table, &k, value_only, &pv)) != 0) {
			if (value_only && ret == EINVAL)
				WT_RET_MSG(session, EINVAL,
				    "A column group cannot store key column "
				    "'%.*s' in its value", (int)k.len, k.str);
			WT_RET_MSG(session, EINVAL,
			    "Column '%.*s' not found", (int)k.len, k.str);
		}

		/*
		 * Check whether we're moving an unsized WT_ITEM from the end
		 * to the middle, or vice-versa.  This determines whether the
		 * size needs to be prepended.  This is the only case where the
		 * destination size can be larger than the source size.
		 */
		if (pv.type == 'u' && !pv.havesize && have_next)
			pv.type = 'U';
		else if (pv.type == 'U' && !have_next)
			pv.type = 'u';

		if (pv.havesize)
			WT_RET(__wt_buf_catfmt(session,
			    format, "%" PRIu32 "%c", pv.size, pv.type));
		else
			WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type));
	} while (have_next);

	return (0);
}
예제 #16
0
파일: schema_plan.c 프로젝트: ajdavis/mongo
/*
 * __wt_struct_plan --
 *	Given a table cursor containing a complete table, build the "projection
 *	plan" to distribute the columns to dependent stores.  A string
 *	representing the plan will be appended to the plan buffer.
 */
int
__wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table,
    const char *columns, size_t len, bool value_only, WT_ITEM *plan)
{
	WT_CONFIG conf;
	WT_CONFIG_ITEM k, v;
	WT_DECL_RET;
	u_int cg, col, current_cg, current_col, i, start_cg, start_col;
	char coltype, current_coltype;
	bool have_it;

	start_cg = start_col = UINT_MAX;	/* -Wuninitialized */

	/* Work through the value columns by skipping over the key columns. */
	__wt_config_initn(session, &conf, columns, len);
	if (value_only)
		for (i = 0; i < table->nkey_columns; i++)
			WT_RET(__wt_config_next(&conf, &k, &v));

	current_cg = cg = 0;
	current_col = col = INT_MAX;
	current_coltype = coltype = WT_PROJ_KEY; /* Keep lint quiet. */
	for (i = 0; (ret = __wt_config_next(&conf, &k, &v)) == 0; i++) {
		have_it = false;

		while ((ret = __find_next_col(session, table,
		    &k, &cg, &col, &coltype)) == 0 &&
		    (!have_it || cg != start_cg || col != start_col)) {
			/*
			 * First we move to the column.  If that is in a
			 * different column group to the last column we
			 * accessed, or before the last column in the same
			 * column group, or moving from the key to the value,
			 * we need to switch column groups or rewind.
			 */
			if (current_cg != cg || current_col > col ||
			    current_coltype != coltype) {
				WT_ASSERT(session, !value_only ||
				    coltype == WT_PROJ_VALUE);
				WT_RET(__wt_buf_catfmt(
				    session, plan, "%u%c", cg, coltype));

				/*
				 * Set the current column group and column
				 * within the table.
				 */
				current_cg = cg;
				current_col = 0;
				current_coltype = coltype;
			}
			/* Now move to the column we want. */
			if (current_col < col) {
				if (col - current_col > 1)
					WT_RET(__wt_buf_catfmt(session,
					    plan, "%u", col - current_col));
				WT_RET(__wt_buf_catfmt(session,
				    plan, "%c", WT_PROJ_SKIP));
			}
			/*
			 * Now copy the value in / out.  In the common case,
			 * where each value is used in one column, we do a
			 * "next" operation.  If the value is used again, we do
			 * a "reuse" operation to avoid making another copy.
			 */
			if (!have_it) {
				WT_RET(__wt_buf_catfmt(session,
				    plan, "%c", WT_PROJ_NEXT));

				start_cg = cg;
				start_col = col;
				have_it = true;
			} else
				WT_RET(__wt_buf_catfmt(session,
				    plan, "%c", WT_PROJ_REUSE));
			current_col = col + 1;
		}
		/*
		 * We may fail to find a column if it is a custom extractor.
		 * In that case, treat it as the first value column: we only
		 * ever use such plans to extract the primary key from the
		 * index.
		 */
		if (ret == WT_NOTFOUND)
			WT_RET(__wt_buf_catfmt(session, plan,
			    "0%c%c", WT_PROJ_VALUE, WT_PROJ_NEXT));
	}
	WT_RET_TEST(ret != WT_NOTFOUND, ret);

	/* Special case empty plans. */
	if (i == 0 && plan->size == 0)
		WT_RET(__wt_buf_set(session, plan, "", 1));

	return (0);
}
예제 #17
0
파일: os_fhandle.c 프로젝트: Machyne/mongo
/*
 * __open_verbose --
 *	Optionally output a verbose message on handle open.
 */
static inline int
__open_verbose(
    WT_SESSION_IMPL *session, const char *name, int file_type, u_int flags)
{
#ifdef HAVE_VERBOSE
	WT_DECL_RET;
	WT_DECL_ITEM(tmp);
	const char *file_type_tag, *sep;

	if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS))
		return (0);

	/*
	 * It's useful to track file opens when debugging platforms, take some
	 * effort to output good tracking information.
	 */

	switch (file_type) {
	case WT_FS_OPEN_FILE_TYPE_CHECKPOINT:
		file_type_tag = "checkpoint";
		break;
	case WT_FS_OPEN_FILE_TYPE_DATA:
		file_type_tag = "data";
		break;
	case WT_FS_OPEN_FILE_TYPE_DIRECTORY:
		file_type_tag = "directory";
		break;
	case WT_FS_OPEN_FILE_TYPE_LOG:
		file_type_tag = "log";
		break;
	case WT_FS_OPEN_FILE_TYPE_REGULAR:
		file_type_tag = "regular";
		break;
	default:
		file_type_tag = "unknown open type";
		break;
	}

	WT_RET(__wt_scr_alloc(session, 0, &tmp));
	sep = " (";
#define	WT_FS_OPEN_VERBOSE_FLAG(f, name)				\
	if (LF_ISSET(f)) {						\
		WT_ERR(__wt_buf_catfmt(					\
		    session, tmp, "%s%s", sep, name));			\
		sep = ", ";						\
	}

	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create");
	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO");
	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive");
	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed");
	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly");

	if (tmp->size != 0)
		WT_ERR(__wt_buf_catfmt(session, tmp, ")"));

	__wt_verbose(session, WT_VERB_FILEOPS,
	    "%s: file-open: type %s%s",
	    name, file_type_tag, tmp->size == 0 ? "" : (char *)tmp->data);

err:	__wt_scr_free(session, &tmp);
	return (ret);
#else
	WT_UNUSED(session);
	WT_UNUSED(name);
	WT_UNUSED(file_type);
	WT_UNUSED(flags);
	return (0);
#endif
}
예제 #18
0
/*
 * __create_index --
 *	Create an index.
 */
static int
__create_index(WT_SESSION_IMPL *session,
    const char *name, int exclusive, const char *config)
{
	WT_CONFIG kcols, pkcols;
	WT_CONFIG_ITEM ckey, cval, icols, kval;
	WT_DECL_PACK_VALUE(pv);
	WT_DECL_RET;
	WT_ITEM confbuf, extra_cols, fmt, namebuf;
	WT_PACK pack;
	WT_TABLE *table;
	const char *cfg[4] =
	    { WT_CONFIG_BASE(session, index_meta), NULL, NULL, NULL };
	const char *sourcecfg[] = { config, NULL, NULL };
	const char *source, *sourceconf, *idxname, *tablename;
	char *idxconf;
	size_t tlen;
	int have_extractor;
	u_int i, npublic_cols;

	sourceconf = NULL;
	idxconf = NULL;
	WT_CLEAR(confbuf);
	WT_CLEAR(fmt);
	WT_CLEAR(extra_cols);
	WT_CLEAR(namebuf);
	have_extractor = 0;

	tablename = name;
	if (!WT_PREFIX_SKIP(tablename, "index:"))
		return (EINVAL);
	idxname = strchr(tablename, ':');
	if (idxname == NULL)
		WT_RET_MSG(session, EINVAL, "Invalid index name, "
		    "should be <table name>:<index name>: %s", name);

	tlen = (size_t)(idxname++ - tablename);
	if ((ret =
	    __wt_schema_get_table(session, tablename, tlen, 1, &table)) != 0)
		WT_RET_MSG(session, ret,
		    "Can't create an index for a non-existent table: %.*s",
		    (int)tlen, tablename);

	if (table->is_simple)
		WT_RET_MSG(session, EINVAL,
		    "%s requires a table with named columns", name);

	if (__wt_config_getones(session, config, "source", &cval) == 0) {
		WT_ERR(__wt_buf_fmt(session, &namebuf,
		    "%.*s", (int)cval.len, cval.str));
		source = namebuf.data;
	} else {
		WT_ERR(__wt_schema_index_source(
		    session, table, idxname, config, &namebuf));
		source = namebuf.data;

		/* Add the source name to the index config before collapsing. */
		WT_ERR(__wt_buf_catfmt(session, &confbuf,
		    ",source=\"%s\"", source));
	}

	if (__wt_config_getones_none(
	    session, config, "extractor", &cval) == 0 && cval.len != 0) {
		have_extractor = 1;
		/* Custom extractors must supply a key format. */
		if ((ret = __wt_config_getones(
		    session, config, "key_format", &kval)) != 0)
			WT_ERR_MSG(session, EINVAL,
			    "%s: custom extractors require a key_format", name);
	}

	/* Calculate the key/value formats. */
	WT_CLEAR(icols);
	if (__wt_config_getones(session, config, "columns", &icols) != 0 &&
	    !have_extractor)
		WT_ERR_MSG(session, EINVAL,
		    "%s: requires 'columns' configuration", name);

	/*
	 * Count the public columns using the declared columns for normal
	 * indices or the key format for custom extractors.
	 */
	npublic_cols = 0;
	if (!have_extractor) {
		WT_ERR(__wt_config_subinit(session, &kcols, &icols));
		while ((ret = __wt_config_next(&kcols, &ckey, &cval)) == 0)
			++npublic_cols;
		WT_ERR_NOTFOUND_OK(ret);
	} else {
		WT_ERR(__pack_initn(session, &pack, kval.str, kval.len));
		while ((ret = __pack_next(&pack, &pv)) == 0)
			++npublic_cols;
		WT_ERR_NOTFOUND_OK(ret);
	}

	/*
	 * The key format for an index is somewhat subtle: the application
	 * specifies a set of columns that it will use for the key, but the
	 * engine usually adds some hidden columns in order to derive the
	 * primary key.  These hidden columns are part of the source's
	 * key_format, which we are calculating now, but not part of an index
	 * cursor's key_format.
	 */
	WT_ERR(__wt_config_subinit(session, &pkcols, &table->colconf));
	for (i = 0; i < table->nkey_columns &&
	    (ret = __wt_config_next(&pkcols, &ckey, &cval)) == 0;
	    i++) {
		/*
		 * If the primary key column is already in the secondary key,
		 * don't add it again.
		 */
		if (__wt_config_subgetraw(session, &icols, &ckey, &cval) == 0) {
			if (have_extractor)
				WT_ERR_MSG(session, EINVAL,
				    "an index with a custom extractor may not "
				    "include primary key columns");
			continue;
		}
		WT_ERR(__wt_buf_catfmt(
		    session, &extra_cols, "%.*s,", (int)ckey.len, ckey.str));
	}
	if (ret != 0 && ret != WT_NOTFOUND)
		goto err;

	/* Index values are empty: all columns are packed into the index key. */
	WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,key_format="));

	if (have_extractor) {
		WT_ERR(__wt_buf_catfmt(session, &fmt, "%.*s",
		    (int)kval.len, kval.str));
		WT_CLEAR(icols);
	}

	/*
	 * Construct the index key format, or append the primary key columns
	 * for custom extractors.
	 */
	WT_ERR(__wt_struct_reformat(session, table,
	    icols.str, icols.len, (const char *)extra_cols.data, 0, &fmt));

	/* Check for a record number index key, which makes no sense. */
	WT_ERR(__wt_config_getones(session, fmt.data, "key_format", &cval));
	if (cval.len == 1 && cval.str[0] == 'r')
		WT_ERR_MSG(session, EINVAL,
		    "column-store index may not use the record number as its "
		    "index key");

	WT_ERR(__wt_buf_catfmt(
	    session, &fmt, ",index_key_columns=%u", npublic_cols));

	sourcecfg[1] = fmt.data;
	WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf));

	WT_ERR(__wt_schema_create(session, source, sourceconf));

	cfg[1] = sourceconf;
	cfg[2] = confbuf.data;
	WT_ERR(__wt_config_collapse(session, cfg, &idxconf));
	if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) {
		/*
		 * If the entry already exists in the metadata, we're done.
		 * This is an error for exclusive creates but okay otherwise.
		 */
		if (ret == WT_DUPLICATE_KEY)
			ret = exclusive ? EEXIST : 0;
		goto err;
	}

	/* Make sure that the configuration is valid. */
	WT_ERR(__wt_schema_open_index(
	    session, table, idxname, strlen(idxname), NULL));

err:	__wt_free(session, idxconf);
	__wt_free(session, sourceconf);
	__wt_buf_free(session, &confbuf);
	__wt_buf_free(session, &extra_cols);
	__wt_buf_free(session, &fmt);
	__wt_buf_free(session, &namebuf);

	__wt_schema_release_table(session, table);
	return (ret);
}
예제 #19
0
/*
 * __create_colgroup --
 *	Create a column group.
 */
static int
__create_colgroup(WT_SESSION_IMPL *session,
    const char *name, int exclusive, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_DECL_RET;
	WT_ITEM confbuf, fmt, namebuf;
	WT_TABLE *table;
	size_t tlen;
	const char **cfgp, *cfg[4] =
	    { WT_CONFIG_BASE(session, colgroup_meta), config, NULL, NULL };
	const char *sourcecfg[] = { config, NULL, NULL };
	const char *cgname, *source, *sourceconf, *tablename;
	char *cgconf, *oldconf;

	sourceconf = NULL;
	cgconf = oldconf = NULL;
	WT_CLEAR(fmt);
	WT_CLEAR(confbuf);
	WT_CLEAR(namebuf);

	tablename = name;
	if (!WT_PREFIX_SKIP(tablename, "colgroup:"))
		return (EINVAL);
	cgname = strchr(tablename, ':');
	if (cgname != NULL) {
		tlen = (size_t)(cgname - tablename);
		++cgname;
	} else
		tlen = strlen(tablename);

	if ((ret =
	    __wt_schema_get_table(session, tablename, tlen, 1, &table)) != 0)
		WT_RET_MSG(session, (ret == WT_NOTFOUND) ? ENOENT : ret,
		    "Can't create '%s' for non-existent table '%.*s'",
		    name, (int)tlen, tablename);

	/* Make sure the column group is referenced from the table. */
	if (cgname != NULL && (ret =
	    __wt_config_subgets(session, &table->cgconf, cgname, &cval)) != 0)
		WT_ERR_MSG(session, EINVAL,
		    "Column group '%s' not found in table '%.*s'",
		    cgname, (int)tlen, tablename);

	/* Find the first NULL entry in the cfg stack. */
	for (cfgp = &cfg[1]; *cfgp; cfgp++)
		;

	/* Add the source to the colgroup config before collapsing. */
	if (__wt_config_getones(
	    session, config, "source", &cval) == 0 && cval.len != 0) {
		WT_ERR(__wt_buf_fmt(
		    session, &namebuf, "%.*s", (int)cval.len, cval.str));
		source = namebuf.data;
	} else {
		WT_ERR(__wt_schema_colgroup_source(
		    session, table, cgname, config, &namebuf));
		source = namebuf.data;
		WT_ERR(__wt_buf_fmt(
		    session, &confbuf, "source=\"%s\"", source));
		*cfgp++ = confbuf.data;
	}

	/* Calculate the key/value formats: these go into the source config. */
	WT_ERR(__wt_buf_fmt(session, &fmt, "key_format=%s", table->key_format));
	if (cgname == NULL)
		WT_ERR(__wt_buf_catfmt
		    (session, &fmt, ",value_format=%s", table->value_format));
	else {
		if (__wt_config_getones(session, config, "columns", &cval) != 0)
			WT_ERR_MSG(session, EINVAL,
			    "No 'columns' configuration for '%s'", name);
		WT_ERR(__wt_buf_catfmt(session, &fmt, ",value_format="));
		WT_ERR(__wt_struct_reformat(session,
		    table, cval.str, cval.len, NULL, 1, &fmt));
	}
	sourcecfg[1] = fmt.data;
	WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf));

	WT_ERR(__wt_schema_create(session, source, sourceconf));

	WT_ERR(__wt_config_collapse(session, cfg, &cgconf));
	if ((ret = __wt_metadata_insert(session, name, cgconf)) != 0) {
		/*
		 * If the entry already exists in the metadata, we're done.
		 * This is an error for exclusive creates but okay otherwise.
		 */
		if (ret == WT_DUPLICATE_KEY)
			ret = exclusive ? EEXIST : 0;
		goto err;
	}

	WT_ERR(__wt_schema_open_colgroups(session, table));

err:	__wt_free(session, cgconf);
	__wt_free(session, sourceconf);
	__wt_free(session, oldconf);
	__wt_buf_free(session, &confbuf);
	__wt_buf_free(session, &fmt);
	__wt_buf_free(session, &namebuf);

	__wt_schema_release_table(session, table);
	return (ret);
}
예제 #20
0
/*
 * __wt_lsm_meta_write --
 *	Write the metadata for an LSM tree.
 */
int
__wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_ITEM(buf);
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	u_int i;
	int first;

	WT_RET(__wt_scr_alloc(session, 0, &buf));
	WT_ERR(__wt_buf_fmt(session, buf,
	    "key_format=%s,value_format=%s,bloom_config=(%s),file_config=(%s)",
	    lsm_tree->key_format, lsm_tree->value_format,
	    lsm_tree->bloom_config, lsm_tree->file_config));
	if (lsm_tree->collator_name != NULL)
		WT_ERR(__wt_buf_catfmt(
		    session, buf, ",collator=%s", lsm_tree->collator_name));
	WT_ERR(__wt_buf_catfmt(session, buf,
	    ",last=%" PRIu32
	    ",chunk_count_limit=%" PRIu32
	    ",chunk_max=%" PRIu64
	    ",chunk_size=%" PRIu64
	    ",auto_throttle=%" PRIu32
	    ",merge_max=%" PRIu32
	    ",merge_min=%" PRIu32
	    ",bloom=%" PRIu32
	    ",bloom_bit_count=%" PRIu32
	    ",bloom_hash_count=%" PRIu32,
	    lsm_tree->last, lsm_tree->chunk_count_limit,
	    lsm_tree->chunk_max, lsm_tree->chunk_size,
	    F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) ? 1 : 0,
	    lsm_tree->merge_max, lsm_tree->merge_min, lsm_tree->bloom,
	    lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count));
	WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=["));
	for (i = 0; i < lsm_tree->nchunks; i++) {
		chunk = lsm_tree->chunk[i];
		if (i > 0)
			WT_ERR(__wt_buf_catfmt(session, buf, ","));
		WT_ERR(__wt_buf_catfmt(session, buf, "id=%" PRIu32, chunk->id));
		if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
			WT_ERR(__wt_buf_catfmt(session, buf, ",bloom"));
		if (chunk->size != 0)
			WT_ERR(__wt_buf_catfmt(session, buf,
			    ",chunk_size=%" PRIu64, chunk->size));
		if (chunk->count != 0)
			WT_ERR(__wt_buf_catfmt(
			    session, buf, ",count=%" PRIu64, chunk->count));
		WT_ERR(__wt_buf_catfmt(
		    session, buf, ",generation=%" PRIu32, chunk->generation));
	}
	WT_ERR(__wt_buf_catfmt(session, buf, "]"));
	WT_ERR(__wt_buf_catfmt(session, buf, ",old_chunks=["));
	first = 1;
	for (i = 0; i < lsm_tree->nold_chunks; i++) {
		chunk = lsm_tree->old_chunks[i];
		WT_ASSERT(session, chunk != NULL);
		if (first)
			first = 0;
		else
			WT_ERR(__wt_buf_catfmt(session, buf, ","));
		WT_ERR(__wt_buf_catfmt(session, buf, "\"%s\"", chunk->uri));
		if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
			WT_ERR(__wt_buf_catfmt(
			    session, buf, ",bloom=\"%s\"", chunk->bloom_uri));
	}
	WT_ERR(__wt_buf_catfmt(session, buf, "]"));
	ret = __wt_metadata_update(session, lsm_tree->name, buf->data);
	WT_ERR(ret);

err:	__wt_scr_free(session, &buf);
	return (ret);
}
예제 #21
0
/*
 * __wt_struct_plan --
 *	Given a table cursor containing a complete table, build the "projection
 *	plan" to distribute the columns to dependent stores.  A string
 *	representing the plan will be appended to the plan buffer.
 */
int
__wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table,
    const char *columns, size_t len, int value_only, WT_ITEM *plan)
{
	WT_BTREE *saved_btree;
	WT_CONFIG conf;
	WT_CONFIG_ITEM k, v;
	WT_DECL_RET;
	int cg, col, current_cg, current_col, start_cg, start_col;
	int i, have_it;
	char coltype, current_coltype;

	saved_btree = session->btree;
	start_cg = start_col = -1;      /* -Wuninitialized */

	/* Work through the value columns by skipping over the key columns. */
	WT_ERR(__wt_config_initn(session, &conf, columns, len));

	if (value_only)
		for (i = 0; i < table->nkey_columns; i++)
			WT_ERR(__wt_config_next(&conf, &k, &v));

	current_cg = cg = 0;
	current_col = col = INT_MAX;
	current_coltype = coltype = WT_PROJ_KEY; /* Keep lint quiet. */
	while (__wt_config_next(&conf, &k, &v) == 0) {
		have_it = 0;

		while (__find_next_col(session, table,
		    &k, &cg, &col, &coltype) == 0 &&
		    (!have_it || cg != start_cg || col != start_col)) {
			/*
			 * First we move to the column.  If that is in a
			 * different column group to the last column we
			 * accessed, or before the last column in the same
			 * column group, or moving from the key to the value,
			 * we need to switch column groups or rewind.
			 */
			if (current_cg != cg || current_col > col ||
			    current_coltype != coltype) {
				WT_ASSERT(session, !value_only ||
				    coltype == WT_PROJ_VALUE);
				WT_ERR(__wt_buf_catfmt(
				    session, plan, "%d%c", cg, coltype));

				/*
				 * Set the current column group and column
				 * within the table.
				 */
				current_cg = cg;
				current_col = 0;
				current_coltype = coltype;
			}
			/* Now move to the column we want. */
			if (current_col < col) {
				if (col - current_col > 1)
					WT_ERR(__wt_buf_catfmt(session,
					    plan, "%d", col - current_col));
				WT_ERR(__wt_buf_catfmt(session,
				    plan, "%c", WT_PROJ_SKIP));
			}
			/*
			 * Now copy the value in / out.  In the common case,
			 * where each value is used in one column, we do a
			 * "next" operation.  If the value is used again, we do
			 * a "reuse" operation to avoid making another copy.
			 */
			if (!have_it) {
				WT_ERR(__wt_buf_catfmt(session,
				    plan, "%c", WT_PROJ_NEXT));

				start_cg = cg;
				start_col = col;
				have_it = 1;
			} else
				WT_ERR(__wt_buf_catfmt(session,
				    plan, "%c", WT_PROJ_REUSE));
			current_col = col + 1;
		}
	}

err:	session->btree = saved_btree;
	return (ret);
}