/* * __wt_struct_reformat -- * Given a table and a list of columns (which could be values in a column * group or index keys), calculate the resulting new format string. * The result will be appended to the format buffer. */ int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, const char *extra_cols, int value_only, WT_ITEM *format) { WT_CONFIG config; WT_CONFIG_ITEM k, next_k, next_v; WT_DECL_RET; WT_PACK_VALUE pv; int have_next; WT_CLEAR(pv); /* -Wuninitialized */ WT_RET(__wt_config_initn(session, &config, columns, len)); WT_RET(__wt_config_next(&config, &next_k, &next_v)); do { k = next_k; ret = __wt_config_next(&config, &next_k, &next_v); if (ret != 0 && ret != WT_NOTFOUND) return (ret); have_next = (ret == 0); if (!have_next && extra_cols != NULL) { WT_RET(__wt_config_init(session, &config, extra_cols)); WT_RET(__wt_config_next(&config, &next_k, &next_v)); have_next = 1; extra_cols = NULL; } if ((ret = __find_column_format(session, table, &k, value_only, &pv)) != 0) { if (value_only && ret == EINVAL) WT_RET_MSG(session, EINVAL, "A column group cannot store key column " "'%.*s' in its value", (int)k.len, k.str); WT_RET_MSG(session, EINVAL, "Column '%.*s' not found", (int)k.len, k.str); } /* * Check whether we're moving an unsized WT_ITEM from the end * to the middle, or vice-versa. This determines whether the * size needs to be prepended. This is the only case where the * destination size can be larger than the source size. */ if (pv.type == 'u' && !pv.havesize && have_next) pv.type = 'U'; else if (pv.type == 'U' && !have_next) pv.type = 'u'; if (pv.havesize) WT_RET(__wt_buf_catfmt( session, format, "%d%c", (int)pv.size, pv.type)); else WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type)); } while (have_next); return (0); }
/* * __wt_meta_snaplist_set -- * Set a file's snapshot value from the WT_SNAPSHOT list. */ int __wt_meta_snaplist_set( WT_SESSION_IMPL *session, const char *name, WT_SNAPSHOT *snapbase) { WT_DECL_RET; WT_ITEM *buf; WT_SNAPSHOT *snap; int64_t order; const char *sep; buf = NULL; WT_ERR(__wt_scr_alloc(session, 0, &buf)); order = 0; sep = ""; WT_ERR(__wt_buf_fmt(session, buf, "snapshot=(")); WT_SNAPSHOT_FOREACH(snapbase, snap) { /* Skip deleted snapshots. */ if (F_ISSET(snap, WT_SNAP_DELETE)) continue; /* * Track the largest active snapshot counter: it's not really * a generational number or an ID because we reset it to 1 if * the snapshot we're writing is the only snapshot the file has. * The problem we're solving is when two snapshots are taken * quickly, the timer may not be unique and/or we can even see * time travel on the second snapshot if we read the time * in-between nanoseconds rolling over. All we need to know * is the real snapshot order so we don't accidentally take the * wrong "last" snapshot. */ if (snap->order > order) order = snap->order; if (F_ISSET(snap, WT_SNAP_ADD | WT_SNAP_UPDATE)) { /* Convert the raw cookie to a hex string. */ WT_ERR(__wt_raw_to_hex(session, snap->raw.data, snap->raw.size, &snap->addr)); if (F_ISSET(snap, WT_SNAP_ADD)) snap->order = order + 1; } WT_ERR(__wt_buf_catfmt(session, buf, "%s%s=(addr=\"%.*s\",order=%" PRIu64 ",time=%" PRIuMAX ",size=%" PRIu64 ")", sep, snap->name, (int)snap->addr.size, (char *)snap->addr.data, snap->order, snap->sec, snap->snapshot_size)); sep = ","; } WT_ERR(__wt_buf_catfmt(session, buf, ")")); WT_ERR(__snap_set(session, name, buf->mem)); err: __wt_scr_free(&buf); return (ret); }
/* * __ckpt_string -- * Return a printable string representation of a checkpoint address cookie. */ static int __ckpt_string(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, WT_ITEM *buf) { WT_BLOCK_CKPT *ci, _ci; /* Initialize the checkpoint, crack the cookie. */ ci = &_ci; WT_RET(__wt_block_ckpt_init(session, block, ci, "string", 0)); WT_RET(__wt_block_buffer_to_ckpt(session, block, addr, ci)); WT_RET(__wt_buf_fmt(session, buf, "version=%d", ci->version)); if (ci->root_offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", root=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->root_offset, (uintmax_t)(ci->root_offset + ci->root_size), ci->root_size, ci->root_cksum)); if (ci->alloc.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->alloc.offset, (uintmax_t)(ci->alloc.offset + ci->alloc.size), ci->alloc.size, ci->alloc.cksum)); if (ci->avail.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", avail=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", avail=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->avail.offset, (uintmax_t)(ci->avail.offset + ci->avail.size), ci->avail.size, ci->avail.cksum)); if (ci->discard.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", discard=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", discard=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)ci->discard.offset, (uintmax_t)(ci->discard.offset + ci->discard.size), ci->discard.size, ci->discard.cksum)); WT_RET(__wt_buf_catfmt(session, buf, ", file size=%" PRIuMAX ", write generation=%" PRIu64, (uintmax_t)ci->file_size, ci->write_gen)); __wt_block_ckpt_destroy(session, ci); return (0); }
/* * __snapshot_string -- * Return a printable string representation of a snapshot address cookie. */ static int __snapshot_string(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, WT_ITEM *buf) { WT_BLOCK_SNAPSHOT *si, _si; /* Initialize the snapshot, crack the cookie. */ si = &_si; WT_RET(__wt_block_snap_init(session, block, si, "string", 0)); WT_RET(__wt_block_buffer_to_snapshot(session, block, addr, si)); WT_RET(__wt_buf_fmt(session, buf, "version=%d", si->version)); if (si->root_offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", root=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", root=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)si->root_offset, (uintmax_t)(si->root_offset + si->root_size), si->root_size, si->root_cksum)); if (si->alloc.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", alloc=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)si->alloc.offset, (uintmax_t)(si->alloc.offset + si->alloc.size), si->alloc.size, si->alloc.cksum)); if (si->avail.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", avail=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", avail=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)si->avail.offset, (uintmax_t)(si->avail.offset + si->avail.size), si->avail.size, si->avail.cksum)); if (si->discard.offset == WT_BLOCK_INVALID_OFFSET) WT_RET(__wt_buf_catfmt(session, buf, ", discard=[Empty]")); else WT_RET(__wt_buf_catfmt(session, buf, ", discard=[%" PRIuMAX "-%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "]", (uintmax_t)si->discard.offset, (uintmax_t)(si->discard.offset + si->discard.size), si->discard.size, si->discard.cksum)); WT_RET(__wt_buf_catfmt(session, buf, ", file size=%" PRIuMAX ", write generation=%" PRIu64, (uintmax_t)si->file_size, si->write_gen)); return (0); }
/* * __wt_config_concat -- * Given a NULL-terminated list of configuration strings, concatenate them * into a newly allocated buffer. Nothing special is assumed about any * of the config strings, they are simply combined in order. * * This code deals with the case where some of the config strings are * wrapped in brackets but others aren't: the resulting string does not * have brackets. */ int __wt_config_concat( WT_SESSION_IMPL *session, const char **cfg, const char **config_ret) { WT_CONFIG cparser; WT_CONFIG_ITEM k, v; WT_ITEM buf; int ret; const char **cp; WT_CLEAR(buf); ret = 0; for (cp = cfg; *cp != NULL; ++cp) { WT_ERR(__wt_config_init(session, &cparser, *cp)); while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) { if (k.type != ITEM_STRING && k.type != ITEM_ID) WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'\n", k.str); /* Include the quotes around string keys/values. */ if (k.type == ITEM_STRING) { --k.str; k.len += 2; } if (v.type == ITEM_STRING) { --v.str; v.len += 2; } WT_ERR(__wt_buf_catfmt(session, &buf, "%.*s%s%.*s,", (int)k.len, k.str, (v.len > 0) ? "=" : "", (int)v.len, v.str)); } if (ret != WT_NOTFOUND) goto err; } /* * If the caller passes us no valid configuration strings, we end up * here with no allocated memory to return. Check the final buffer * size: empty configuration strings are possible, and paranoia is * good. */ if (buf.size == 0) WT_RET(__wt_buf_initsize(session, &buf, 1)); /* Strip the trailing comma and NUL-terminate */ ((char *)buf.data)[buf.size - 1] = '\0'; *config_ret = buf.data; return (0); err: __wt_buf_free(session, &buf); return (ret); }
/* * __wt_struct_truncate -- * Return a packing string for the first N columns in a value. */ int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, WT_ITEM *format) { WT_DECL_PACK_VALUE(pv); WT_PACK pack; WT_RET(__pack_init(session, &pack, input_fmt)); while (ncols-- > 0) { WT_RET(__pack_next(&pack, &pv)); if (pv.havesize) WT_RET(__wt_buf_catfmt(session, format, "%" PRIu32 "%c", pv.size, pv.type)); else WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type)); } return (0); }
/* * __wt_struct_truncate -- * Return a packing string for the first N columns in a value. */ int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, WT_ITEM *format) { WT_PACK pack; WT_PACK_VALUE pv; WT_CLEAR(pv); /* -Wuninitialized */ WT_RET(__pack_init(session, &pack, input_fmt)); while (ncols-- > 0) { WT_RET(__pack_next(&pack, &pv)); if (pv.havesize) WT_RET(__wt_buf_catfmt( session, format, "%d%c", (int)pv.size, pv.type)); else WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type)); } return (0); }
/* * __wt_config_collapse -- * Collapse a set of configuration strings into newly allocated memory. * * This function takes a NULL-terminated list of configuration strings (where * the first one contains all the defaults and the values are in order from * least to most preferred, that is, the default values are least preferred), * and collapses them into newly allocated memory. The algorithm is to walk * the first of the configuration strings, and for each entry, search all of * the configuration strings for a final value, keeping the last value found. * * Notes: * Any key not appearing in the first configuration string is discarded * from the final result, because we'll never search for it. * * Nested structures aren't parsed. For example, imagine a configuration * string contains "key=(k2=v2,k3=v3)", and a subsequent string has * "key=(k4=v4)", the result will be "key=(k4=v4)", as we search for and * use the final value of "key", regardless of field overlap or missing * fields in the nested value. */ int __wt_config_collapse( WT_SESSION_IMPL *session, const char **cfg, char **config_ret) { WT_CONFIG cparser; WT_CONFIG_ITEM k, v; WT_DECL_ITEM(tmp); WT_DECL_RET; *config_ret = NULL; WT_RET(__wt_scr_alloc(session, 0, &tmp)); __wt_config_init(session, &cparser, cfg[0]); while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) { if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID) WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'", k.str); WT_ERR(__wt_config_get(session, cfg, &k, &v)); /* Include the quotes around string keys/values. */ if (k.type == WT_CONFIG_ITEM_STRING) { --k.str; k.len += 2; } if (v.type == WT_CONFIG_ITEM_STRING) { --v.str; v.len += 2; } WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,", (int)k.len, k.str, (int)v.len, v.str)); } /* We loop until error, and the expected error is WT_NOTFOUND. */ if (ret != WT_NOTFOUND) goto err; /* * If the caller passes us no valid configuration strings, we get here * with no bytes to copy -- that's OK, the underlying string copy can * handle empty strings. * * Strip any trailing comma. */ if (tmp->size != 0) --tmp->size; ret = __wt_strndup(session, tmp->data, tmp->size, config_ret); err: __wt_scr_free(session, &tmp); return (ret); }
/* * __wt_config_upgrade -- * Upgrade a configuration string by appended the replacement version. */ int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf) { WT_CONFIG_ITEM v; const char *config; config = buf->data; /* * wiredtiger_open: * lsm_merge=boolean -> lsm_manager=(merge=boolean) */ if (__wt_config_getones( session, config, "lsm_merge", &v) != WT_NOTFOUND) WT_RET(__wt_buf_catfmt(session, buf, ",lsm_manager=(merge=%s)", v.val ? "true" : "false")); return (0); }
/* * __open_index -- * Open an index. */ static int __open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx) { WT_CONFIG colconf; WT_CONFIG_ITEM ckey, cval, metadata; WT_DECL_ITEM(buf); WT_DECL_ITEM(plan); WT_DECL_RET; u_int npublic_cols, i; WT_ERR(__wt_scr_alloc(session, 0, &buf)); /* Get the data source from the index config. */ WT_ERR(__wt_config_getones(session, idx->config, "source", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->source)); WT_ERR(__wt_config_getones(session, idx->config, "immutable", &cval)); if (cval.val) F_SET(idx, WT_INDEX_IMMUTABLE); /* * Compatibility: we didn't always maintain collator information in * index metadata, cope when it isn't found. */ WT_CLEAR(cval); WT_ERR_NOTFOUND_OK(__wt_config_getones( session, idx->config, "collator", &cval)); if (cval.len != 0) { WT_CLEAR(metadata); WT_ERR_NOTFOUND_OK(__wt_config_getones( session, idx->config, "app_metadata", &metadata)); WT_ERR(__wt_collator_config( session, idx->name, &cval, &metadata, &idx->collator, &idx->collator_owned)); } WT_ERR(__wt_extractor_config( session, idx->name, idx->config, &idx->extractor, &idx->extractor_owned)); WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->key_format)); /* * The key format for an index is somewhat subtle: the application * specifies a set of columns that it will use for the key, but the * engine usually adds some hidden columns in order to derive the * primary key. These hidden columns are part of the file's key. * * The file's key_format is stored persistently, we need to calculate * the index cursor key format (which will usually omit some of those * keys). */ WT_ERR(__wt_buf_init(session, buf, 0)); WT_ERR(__wt_config_getones( session, idx->config, "columns", &idx->colconf)); /* Start with the declared index columns. */ WT_ERR(__wt_config_subinit(session, &colconf, &idx->colconf)); for (npublic_cols = 0; (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0; ++npublic_cols) WT_ERR(__wt_buf_catfmt( session, buf, "%.*s,", (int)ckey.len, ckey.str)); if (ret != WT_NOTFOUND) goto err; /* * If we didn't find any columns, the index must have an extractor. * We don't rely on this unconditionally because it was only added to * the metadata after version 2.3.1. */ if (npublic_cols == 0) { WT_ERR(__wt_config_getones( session, idx->config, "index_key_columns", &cval)); npublic_cols = (u_int)cval.val; WT_ASSERT(session, npublic_cols != 0); for (i = 0; i < npublic_cols; i++) WT_ERR(__wt_buf_catfmt(session, buf, "\"bad col\",")); } /* * Now add any primary key columns from the table that are not * already part of the index key. */ WT_ERR(__wt_config_subinit(session, &colconf, &table->colconf)); for (i = 0; i < table->nkey_columns && (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0; i++) { /* * If the primary key column is already in the secondary key, * don't add it again. */ if (__wt_config_subgetraw( session, &idx->colconf, &ckey, &cval) == 0) continue; WT_ERR(__wt_buf_catfmt( session, buf, "%.*s,", (int)ckey.len, ckey.str)); } WT_ERR_NOTFOUND_OK(ret); /* * If the table doesn't yet have its column groups, don't try to * calculate a plan: we are just checking that the index creation is * sane. */ if (!table->cg_complete) goto err; WT_ERR(__wt_scr_alloc(session, 0, &plan)); WT_ERR(__wt_struct_plan( session, table, buf->data, buf->size, false, plan)); WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->key_plan)); /* Set up the cursor key format (the visible columns). */ WT_ERR(__wt_buf_init(session, buf, 0)); WT_ERR(__wt_struct_truncate(session, idx->key_format, npublic_cols, buf)); WT_ERR(__wt_strndup( session, buf->data, buf->size, &idx->idxkey_format)); /* * Add a trailing padding byte to the format. This ensures that there * will be no special optimization of the last column, so the primary * key columns can be simply appended. */ WT_ERR(__wt_buf_catfmt(session, buf, "x")); WT_ERR(__wt_strndup(session, buf->data, buf->size, &idx->exkey_format)); /* By default, index cursor values are the table value columns. */ /* TODO Optimize to use index columns in preference to table lookups. */ WT_ERR(__wt_buf_init(session, plan, 0)); WT_ERR(__wt_struct_plan(session, table, table->colconf.str, table->colconf.len, true, plan)); WT_ERR(__wt_strndup(session, plan->data, plan->size, &idx->value_plan)); err: __wt_scr_free(session, &buf); __wt_scr_free(session, &plan); return (ret); }
/* * __wt_meta_ckptlist_set -- * Set a file's checkpoint value from the WT_CKPT list. */ int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) { WT_CKPT *ckpt; WT_DECL_ITEM(buf); WT_DECL_RET; time_t secs; int64_t maxorder; const char *sep; WT_ERR(__wt_scr_alloc(session, 0, &buf)); maxorder = 0; sep = ""; WT_ERR(__wt_buf_fmt(session, buf, "checkpoint=(")); WT_CKPT_FOREACH(ckptbase, ckpt) { /* * Each internal checkpoint name is appended with a generation * to make it a unique name. We're solving two problems: when * two checkpoints are taken quickly, the timer may not be * unique and/or we can even see time travel on the second * checkpoint if we snapshot the time in-between nanoseconds * rolling over. Second, if we reset the generational counter * when new checkpoints arrive, we could logically re-create * specific checkpoints, racing with cursors open on those * checkpoints. I can't think of any way to return incorrect * results by racing with those cursors, but it's simpler not * to worry about it. */ if (ckpt->order > maxorder) maxorder = ckpt->order; /* Skip deleted checkpoints. */ if (F_ISSET(ckpt, WT_CKPT_DELETE)) continue; if (F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_UPDATE)) { /* * We fake checkpoints for handles in the middle of a * bulk load. If there is a checkpoint, convert the * raw cookie to a hex string. */ if (ckpt->raw.size == 0) ckpt->addr.size = 0; else WT_ERR(__wt_raw_to_hex(session, ckpt->raw.data, ckpt->raw.size, &ckpt->addr)); /* Set the order and timestamp. */ if (F_ISSET(ckpt, WT_CKPT_ADD)) ckpt->order = ++maxorder; /* * XXX * Assumes a time_t fits into a uintmax_t, which isn't * guaranteed, a time_t has to be an arithmetic type, * but not an integral type. */ WT_ERR(__wt_seconds(session, &secs)); ckpt->sec = (uintmax_t)secs; } if (strcmp(ckpt->name, WT_CHECKPOINT) == 0) WT_ERR(__wt_buf_catfmt(session, buf, "%s%s.%" PRId64 "=(addr=\"%.*s\",order=%" PRIu64 ",time=%" PRIuMAX ",size=%" PRIu64 ",write_gen=%" PRIu64 ")", sep, ckpt->name, ckpt->order, (int)ckpt->addr.size, (char *)ckpt->addr.data, ckpt->order, ckpt->sec, ckpt->ckpt_size, ckpt->write_gen)); else WT_ERR(__wt_buf_catfmt(session, buf, "%s%s=(addr=\"%.*s\",order=%" PRIu64 ",time=%" PRIuMAX ",size=%" PRIu64 ",write_gen=%" PRIu64 ")", sep, ckpt->name, (int)ckpt->addr.size, (char *)ckpt->addr.data, ckpt->order, ckpt->sec, ckpt->ckpt_size, ckpt->write_gen)); sep = ","; } WT_ERR(__wt_buf_catfmt(session, buf, ")")); if (ckptlsn != NULL) WT_ERR(__wt_buf_catfmt(session, buf, ",checkpoint_lsn=(%" PRIu32 ",%" PRIuMAX ")", ckptlsn->file, (uintmax_t)ckptlsn->offset)); WT_ERR(__ckpt_set(session, fname, buf->mem)); err: __wt_scr_free(&buf); return (ret); }
/* * __create_index -- * Create an index. */ static int __create_index(WT_SESSION_IMPL *session, const char *name, int exclusive, const char *config) { WT_CONFIG pkcols; WT_CONFIG_ITEM ckey, cval, icols; WT_DECL_RET; WT_ITEM confbuf, extra_cols, fmt, namebuf; WT_TABLE *table; const char *cfg[4] = { WT_CONFIG_BASE(session, index_meta), NULL, NULL, NULL }; const char *sourcecfg[] = { config, NULL, NULL }; const char *sourceconf, *source, *idxconf, *idxname; const char *tablename; size_t tlen; u_int i; idxconf = sourceconf = NULL; WT_CLEAR(confbuf); WT_CLEAR(fmt); WT_CLEAR(extra_cols); WT_CLEAR(namebuf); tablename = name; if (!WT_PREFIX_SKIP(tablename, "index:")) return (EINVAL); idxname = strchr(tablename, ':'); if (idxname == NULL) WT_RET_MSG(session, EINVAL, "Invalid index name, " "should be <table name>:<index name>: %s", name); tlen = (size_t)(idxname++ - tablename); if ((ret = __wt_schema_get_table(session, tablename, tlen, 1, &table)) != 0) WT_RET_MSG(session, ret, "Can't create an index for a non-existent table: %.*s", (int)tlen, tablename); if (__wt_config_getones(session, config, "source", &cval) == 0) { WT_ERR(__wt_buf_fmt(session, &namebuf, "%.*s", (int)cval.len, cval.str)); source = namebuf.data; } else { WT_ERR(__wt_schema_index_source( session, table, idxname, config, &namebuf)); source = namebuf.data; /* Add the source name to the index config before collapsing. */ WT_ERR(__wt_buf_catfmt(session, &confbuf, ",source=\"%s\"", source)); } /* Calculate the key/value formats. */ if (__wt_config_getones(session, config, "columns", &icols) != 0) WT_ERR_MSG(session, EINVAL, "No 'columns' configuration for '%s'", name); /* * The key format for an index is somewhat subtle: the application * specifies a set of columns that it will use for the key, but the * engine usually adds some hidden columns in order to derive the * primary key. These hidden columns are part of the source's * key_format, which we are calculating now, but not part of an index * cursor's key_format. */ WT_ERR(__wt_config_subinit(session, &pkcols, &table->colconf)); for (i = 0; i < table->nkey_columns && (ret = __wt_config_next(&pkcols, &ckey, &cval)) == 0; i++) { /* * If the primary key column is already in the secondary key, * don't add it again. */ if (__wt_config_subgetraw(session, &icols, &ckey, &cval) == 0) continue; WT_ERR(__wt_buf_catfmt( session, &extra_cols, "%.*s,", (int)ckey.len, ckey.str)); } if (ret != 0 && ret != WT_NOTFOUND) goto err; /* * Index values are normally empty: all columns are packed into the * index key. The exception is LSM, which (currently) reserves empty * values as tombstones. Use a single padding byte in that case. */ if (WT_PREFIX_MATCH(source, "lsm:")) WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=x,")); else WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,")); WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,key_format=")); WT_ERR(__wt_struct_reformat(session, table, icols.str, icols.len, (const char *)extra_cols.data, 0, &fmt)); /* Check for a record number index key, which makes no sense. */ WT_ERR(__wt_config_getones(session, fmt.data, "key_format", &cval)); if (cval.len == 1 && cval.str[0] == 'r') WT_ERR_MSG(session, EINVAL, "column-store index may not use the record number as its " "index key"); sourcecfg[1] = fmt.data; WT_ERR(__wt_config_concat(session, sourcecfg, &sourceconf)); WT_ERR(__wt_schema_create(session, source, sourceconf)); cfg[1] = sourceconf; cfg[2] = confbuf.data; WT_ERR(__wt_config_collapse(session, cfg, &idxconf)); if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) { /* * If the entry already exists in the metadata, we're done. * This is an error for exclusive creates but okay otherwise. */ if (ret == WT_DUPLICATE_KEY) ret = exclusive ? EEXIST : 0; goto err; } err: __wt_free(session, idxconf); __wt_free(session, sourceconf); __wt_buf_free(session, &confbuf); __wt_buf_free(session, &extra_cols); __wt_buf_free(session, &fmt); __wt_buf_free(session, &namebuf); __wt_schema_release_table(session, table); return (ret); }
/* * __config_merge_format_next -- * Walk the array, building entries. */ static int __config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix, size_t plen, size_t *enp, WT_CONFIG_MERGE *cp, WT_ITEM *build) { WT_CONFIG_MERGE_ENTRY *ep; size_t len1, len2, next; char *p; for (; *enp < cp->entries_next; ++*enp) { ep = &cp->entries[*enp]; len1 = strlen(ep->k); /* * The entries are in sorted order, take the last entry for any * key. */ if (*enp < (cp->entries_next - 1)) { len2 = strlen((ep + 1)->k); /* Choose the last of identical keys. */ if (len1 == len2 && memcmp(ep->k, (ep + 1)->k, len1) == 0) continue; /* * The test is complicated by matching empty entries * "foo=" against nested structures "foo,bar=", where * the latter is a replacement for the former. */ if (len2 > len1 && (ep + 1)->k[len1] == SEPC && memcmp(ep->k, (ep + 1)->k, len1) == 0) continue; } /* * If we're skipping a prefix and this entry doesn't match it, * back off one entry and pop up a level. */ if (plen != 0 && (plen > len1 || memcmp(ep->k, prefix, plen) != 0)) { --*enp; break; } /* * If the entry introduces a new level, recurse through that * new level. */ if ((p = strchr(ep->k + plen, SEPC)) != NULL) { next = WT_PTRDIFF(p, ep->k); WT_RET(__wt_buf_catfmt(session, build, "%.*s=(", (int)(next - plen), ep->k + plen)); WT_RET(__config_merge_format_next( session, ep->k, next + 1, enp, cp, build)); __strip_comma(build); WT_RET(__wt_buf_catfmt(session, build, "),")); continue; } /* Append the entry to the buffer. */ WT_RET(__wt_buf_catfmt( session, build, "%s=%s,", ep->k + plen, ep->v)); } return (0); }
/* * ___open_index -- * Open an index. */ static int __open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx) { WT_CONFIG colconf; WT_CONFIG_ITEM ckey, cval; WT_DECL_ITEM(buf); WT_DECL_ITEM(plan); WT_DECL_RET; u_int cursor_key_cols, i; WT_ERR(__wt_scr_alloc(session, 0, &buf)); /* Get the data source from the index config. */ WT_ERR(__wt_config_getones(session, idx->config, "source", &cval)); WT_ERR(__wt_buf_fmt( session, buf, "%.*s", (int)cval.len, cval.str)); idx->source = __wt_buf_steal(session, buf, NULL); idx->need_value = WT_PREFIX_MATCH(idx->source, "lsm:"); WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval)); WT_ERR(__wt_buf_fmt( session, buf, "%.*s", (int)cval.len, cval.str)); idx->key_format = __wt_buf_steal(session, buf, NULL); /* * The key format for an index is somewhat subtle: the application * specifies a set of columns that it will use for the key, but the * engine usually adds some hidden columns in order to derive the * primary key. These hidden columns are part of the file's key. * * The file's key_format is stored persistently, we need to calculate * the index cursor key format (which will usually omit some of those * keys). */ WT_ERR(__wt_config_getones( session, idx->config, "columns", &idx->colconf)); /* Start with the declared index columns. */ WT_ERR(__wt_config_subinit(session, &colconf, &idx->colconf)); cursor_key_cols = 0; while ((ret = __wt_config_next(&colconf, &ckey, &cval)) == 0) { WT_ERR(__wt_buf_catfmt( session, buf, "%.*s,", (int)ckey.len, ckey.str)); ++cursor_key_cols; } if (ret != 0 && ret != WT_NOTFOUND) goto err; /* * Now add any primary key columns from the table that are not * already part of the index key. */ WT_ERR(__wt_config_subinit(session, &colconf, &table->colconf)); for (i = 0; i < table->nkey_columns && (ret = __wt_config_next(&colconf, &ckey, &cval)) == 0; i++) { /* * If the primary key column is already in the secondary key, * don't add it again. */ if (__wt_config_subgetraw( session, &idx->colconf, &ckey, &cval) == 0) continue; WT_ERR(__wt_buf_catfmt( session, buf, "%.*s,", (int)ckey.len, ckey.str)); } if (ret != 0 && ret != WT_NOTFOUND) goto err; WT_ERR(__wt_scr_alloc(session, 0, &plan)); WT_ERR(__wt_struct_plan(session, table, buf->data, buf->size, 0, plan)); idx->key_plan = __wt_buf_steal(session, plan, NULL); /* Set up the cursor key format (the visible columns). */ WT_ERR(__wt_buf_init(session, buf, 0)); WT_ERR(__wt_struct_truncate(session, idx->key_format, cursor_key_cols, buf)); idx->idxkey_format = __wt_buf_steal(session, buf, NULL); /* By default, index cursor values are the table value columns. */ /* TODO Optimize to use index columns in preference to table lookups. */ WT_ERR(__wt_struct_plan(session, table, table->colconf.str, table->colconf.len, 1, plan)); idx->value_plan = __wt_buf_steal(session, plan, NULL); err: __wt_scr_free(&buf); __wt_scr_free(&plan); return (ret); }
/* * __wt_struct_reformat -- * Given a table and a list of columns (which could be values in a column * group or index keys), calculate the resulting new format string. * The result will be appended to the format buffer. */ int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, const char *extra_cols, bool value_only, WT_ITEM *format) { WT_CONFIG config; WT_CONFIG_ITEM k, next_k, next_v; WT_DECL_PACK_VALUE(pv); WT_DECL_RET; bool have_next; __wt_config_initn(session, &config, columns, len); /* * If an empty column list is specified, this will fail with * WT_NOTFOUND, that's okay. */ WT_RET_NOTFOUND_OK(ret = __wt_config_next(&config, &next_k, &next_v)); if (ret == WT_NOTFOUND) { if (extra_cols != NULL) { __wt_config_init(session, &config, extra_cols); WT_RET(__wt_config_next(&config, &next_k, &next_v)); extra_cols = NULL; } else if (format->size == 0) { WT_RET(__wt_buf_set(session, format, "", 1)); return (0); } } do { k = next_k; ret = __wt_config_next(&config, &next_k, &next_v); if (ret != 0 && ret != WT_NOTFOUND) return (ret); have_next = ret == 0; if (!have_next && extra_cols != NULL) { __wt_config_init(session, &config, extra_cols); WT_RET(__wt_config_next(&config, &next_k, &next_v)); have_next = true; extra_cols = NULL; } if ((ret = __find_column_format(session, table, &k, value_only, &pv)) != 0) { if (value_only && ret == EINVAL) WT_RET_MSG(session, EINVAL, "A column group cannot store key column " "'%.*s' in its value", (int)k.len, k.str); WT_RET_MSG(session, EINVAL, "Column '%.*s' not found", (int)k.len, k.str); } /* * Check whether we're moving an unsized WT_ITEM from the end * to the middle, or vice-versa. This determines whether the * size needs to be prepended. This is the only case where the * destination size can be larger than the source size. */ if (pv.type == 'u' && !pv.havesize && have_next) pv.type = 'U'; else if (pv.type == 'U' && !have_next) pv.type = 'u'; if (pv.havesize) WT_RET(__wt_buf_catfmt(session, format, "%" PRIu32 "%c", pv.size, pv.type)); else WT_RET(__wt_buf_catfmt(session, format, "%c", pv.type)); } while (have_next); return (0); }
/* * __wt_struct_plan -- * Given a table cursor containing a complete table, build the "projection * plan" to distribute the columns to dependent stores. A string * representing the plan will be appended to the plan buffer. */ int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, bool value_only, WT_ITEM *plan) { WT_CONFIG conf; WT_CONFIG_ITEM k, v; WT_DECL_RET; u_int cg, col, current_cg, current_col, i, start_cg, start_col; char coltype, current_coltype; bool have_it; start_cg = start_col = UINT_MAX; /* -Wuninitialized */ /* Work through the value columns by skipping over the key columns. */ __wt_config_initn(session, &conf, columns, len); if (value_only) for (i = 0; i < table->nkey_columns; i++) WT_RET(__wt_config_next(&conf, &k, &v)); current_cg = cg = 0; current_col = col = INT_MAX; current_coltype = coltype = WT_PROJ_KEY; /* Keep lint quiet. */ for (i = 0; (ret = __wt_config_next(&conf, &k, &v)) == 0; i++) { have_it = false; while ((ret = __find_next_col(session, table, &k, &cg, &col, &coltype)) == 0 && (!have_it || cg != start_cg || col != start_col)) { /* * First we move to the column. If that is in a * different column group to the last column we * accessed, or before the last column in the same * column group, or moving from the key to the value, * we need to switch column groups or rewind. */ if (current_cg != cg || current_col > col || current_coltype != coltype) { WT_ASSERT(session, !value_only || coltype == WT_PROJ_VALUE); WT_RET(__wt_buf_catfmt( session, plan, "%u%c", cg, coltype)); /* * Set the current column group and column * within the table. */ current_cg = cg; current_col = 0; current_coltype = coltype; } /* Now move to the column we want. */ if (current_col < col) { if (col - current_col > 1) WT_RET(__wt_buf_catfmt(session, plan, "%u", col - current_col)); WT_RET(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_SKIP)); } /* * Now copy the value in / out. In the common case, * where each value is used in one column, we do a * "next" operation. If the value is used again, we do * a "reuse" operation to avoid making another copy. */ if (!have_it) { WT_RET(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_NEXT)); start_cg = cg; start_col = col; have_it = true; } else WT_RET(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_REUSE)); current_col = col + 1; } /* * We may fail to find a column if it is a custom extractor. * In that case, treat it as the first value column: we only * ever use such plans to extract the primary key from the * index. */ if (ret == WT_NOTFOUND) WT_RET(__wt_buf_catfmt(session, plan, "0%c%c", WT_PROJ_VALUE, WT_PROJ_NEXT)); } WT_RET_TEST(ret != WT_NOTFOUND, ret); /* Special case empty plans. */ if (i == 0 && plan->size == 0) WT_RET(__wt_buf_set(session, plan, "", 1)); return (0); }
/* * __open_verbose -- * Optionally output a verbose message on handle open. */ static inline int __open_verbose( WT_SESSION_IMPL *session, const char *name, int file_type, u_int flags) { #ifdef HAVE_VERBOSE WT_DECL_RET; WT_DECL_ITEM(tmp); const char *file_type_tag, *sep; if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) return (0); /* * It's useful to track file opens when debugging platforms, take some * effort to output good tracking information. */ switch (file_type) { case WT_FS_OPEN_FILE_TYPE_CHECKPOINT: file_type_tag = "checkpoint"; break; case WT_FS_OPEN_FILE_TYPE_DATA: file_type_tag = "data"; break; case WT_FS_OPEN_FILE_TYPE_DIRECTORY: file_type_tag = "directory"; break; case WT_FS_OPEN_FILE_TYPE_LOG: file_type_tag = "log"; break; case WT_FS_OPEN_FILE_TYPE_REGULAR: file_type_tag = "regular"; break; default: file_type_tag = "unknown open type"; break; } WT_RET(__wt_scr_alloc(session, 0, &tmp)); sep = " ("; #define WT_FS_OPEN_VERBOSE_FLAG(f, name) \ if (LF_ISSET(f)) { \ WT_ERR(__wt_buf_catfmt( \ session, tmp, "%s%s", sep, name)); \ sep = ", "; \ } WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create"); WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO"); WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive"); WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed"); WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly"); if (tmp->size != 0) WT_ERR(__wt_buf_catfmt(session, tmp, ")")); __wt_verbose(session, WT_VERB_FILEOPS, "%s: file-open: type %s%s", name, file_type_tag, tmp->size == 0 ? "" : (char *)tmp->data); err: __wt_scr_free(session, &tmp); return (ret); #else WT_UNUSED(session); WT_UNUSED(name); WT_UNUSED(file_type); WT_UNUSED(flags); return (0); #endif }
/* * __create_index -- * Create an index. */ static int __create_index(WT_SESSION_IMPL *session, const char *name, int exclusive, const char *config) { WT_CONFIG kcols, pkcols; WT_CONFIG_ITEM ckey, cval, icols, kval; WT_DECL_PACK_VALUE(pv); WT_DECL_RET; WT_ITEM confbuf, extra_cols, fmt, namebuf; WT_PACK pack; WT_TABLE *table; const char *cfg[4] = { WT_CONFIG_BASE(session, index_meta), NULL, NULL, NULL }; const char *sourcecfg[] = { config, NULL, NULL }; const char *source, *sourceconf, *idxname, *tablename; char *idxconf; size_t tlen; int have_extractor; u_int i, npublic_cols; sourceconf = NULL; idxconf = NULL; WT_CLEAR(confbuf); WT_CLEAR(fmt); WT_CLEAR(extra_cols); WT_CLEAR(namebuf); have_extractor = 0; tablename = name; if (!WT_PREFIX_SKIP(tablename, "index:")) return (EINVAL); idxname = strchr(tablename, ':'); if (idxname == NULL) WT_RET_MSG(session, EINVAL, "Invalid index name, " "should be <table name>:<index name>: %s", name); tlen = (size_t)(idxname++ - tablename); if ((ret = __wt_schema_get_table(session, tablename, tlen, 1, &table)) != 0) WT_RET_MSG(session, ret, "Can't create an index for a non-existent table: %.*s", (int)tlen, tablename); if (table->is_simple) WT_RET_MSG(session, EINVAL, "%s requires a table with named columns", name); if (__wt_config_getones(session, config, "source", &cval) == 0) { WT_ERR(__wt_buf_fmt(session, &namebuf, "%.*s", (int)cval.len, cval.str)); source = namebuf.data; } else { WT_ERR(__wt_schema_index_source( session, table, idxname, config, &namebuf)); source = namebuf.data; /* Add the source name to the index config before collapsing. */ WT_ERR(__wt_buf_catfmt(session, &confbuf, ",source=\"%s\"", source)); } if (__wt_config_getones_none( session, config, "extractor", &cval) == 0 && cval.len != 0) { have_extractor = 1; /* Custom extractors must supply a key format. */ if ((ret = __wt_config_getones( session, config, "key_format", &kval)) != 0) WT_ERR_MSG(session, EINVAL, "%s: custom extractors require a key_format", name); } /* Calculate the key/value formats. */ WT_CLEAR(icols); if (__wt_config_getones(session, config, "columns", &icols) != 0 && !have_extractor) WT_ERR_MSG(session, EINVAL, "%s: requires 'columns' configuration", name); /* * Count the public columns using the declared columns for normal * indices or the key format for custom extractors. */ npublic_cols = 0; if (!have_extractor) { WT_ERR(__wt_config_subinit(session, &kcols, &icols)); while ((ret = __wt_config_next(&kcols, &ckey, &cval)) == 0) ++npublic_cols; WT_ERR_NOTFOUND_OK(ret); } else { WT_ERR(__pack_initn(session, &pack, kval.str, kval.len)); while ((ret = __pack_next(&pack, &pv)) == 0) ++npublic_cols; WT_ERR_NOTFOUND_OK(ret); } /* * The key format for an index is somewhat subtle: the application * specifies a set of columns that it will use for the key, but the * engine usually adds some hidden columns in order to derive the * primary key. These hidden columns are part of the source's * key_format, which we are calculating now, but not part of an index * cursor's key_format. */ WT_ERR(__wt_config_subinit(session, &pkcols, &table->colconf)); for (i = 0; i < table->nkey_columns && (ret = __wt_config_next(&pkcols, &ckey, &cval)) == 0; i++) { /* * If the primary key column is already in the secondary key, * don't add it again. */ if (__wt_config_subgetraw(session, &icols, &ckey, &cval) == 0) { if (have_extractor) WT_ERR_MSG(session, EINVAL, "an index with a custom extractor may not " "include primary key columns"); continue; } WT_ERR(__wt_buf_catfmt( session, &extra_cols, "%.*s,", (int)ckey.len, ckey.str)); } if (ret != 0 && ret != WT_NOTFOUND) goto err; /* Index values are empty: all columns are packed into the index key. */ WT_ERR(__wt_buf_fmt(session, &fmt, "value_format=,key_format=")); if (have_extractor) { WT_ERR(__wt_buf_catfmt(session, &fmt, "%.*s", (int)kval.len, kval.str)); WT_CLEAR(icols); } /* * Construct the index key format, or append the primary key columns * for custom extractors. */ WT_ERR(__wt_struct_reformat(session, table, icols.str, icols.len, (const char *)extra_cols.data, 0, &fmt)); /* Check for a record number index key, which makes no sense. */ WT_ERR(__wt_config_getones(session, fmt.data, "key_format", &cval)); if (cval.len == 1 && cval.str[0] == 'r') WT_ERR_MSG(session, EINVAL, "column-store index may not use the record number as its " "index key"); WT_ERR(__wt_buf_catfmt( session, &fmt, ",index_key_columns=%u", npublic_cols)); sourcecfg[1] = fmt.data; WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf)); WT_ERR(__wt_schema_create(session, source, sourceconf)); cfg[1] = sourceconf; cfg[2] = confbuf.data; WT_ERR(__wt_config_collapse(session, cfg, &idxconf)); if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) { /* * If the entry already exists in the metadata, we're done. * This is an error for exclusive creates but okay otherwise. */ if (ret == WT_DUPLICATE_KEY) ret = exclusive ? EEXIST : 0; goto err; } /* Make sure that the configuration is valid. */ WT_ERR(__wt_schema_open_index( session, table, idxname, strlen(idxname), NULL)); err: __wt_free(session, idxconf); __wt_free(session, sourceconf); __wt_buf_free(session, &confbuf); __wt_buf_free(session, &extra_cols); __wt_buf_free(session, &fmt); __wt_buf_free(session, &namebuf); __wt_schema_release_table(session, table); return (ret); }
/* * __create_colgroup -- * Create a column group. */ static int __create_colgroup(WT_SESSION_IMPL *session, const char *name, int exclusive, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_RET; WT_ITEM confbuf, fmt, namebuf; WT_TABLE *table; size_t tlen; const char **cfgp, *cfg[4] = { WT_CONFIG_BASE(session, colgroup_meta), config, NULL, NULL }; const char *sourcecfg[] = { config, NULL, NULL }; const char *cgname, *source, *sourceconf, *tablename; char *cgconf, *oldconf; sourceconf = NULL; cgconf = oldconf = NULL; WT_CLEAR(fmt); WT_CLEAR(confbuf); WT_CLEAR(namebuf); tablename = name; if (!WT_PREFIX_SKIP(tablename, "colgroup:")) return (EINVAL); cgname = strchr(tablename, ':'); if (cgname != NULL) { tlen = (size_t)(cgname - tablename); ++cgname; } else tlen = strlen(tablename); if ((ret = __wt_schema_get_table(session, tablename, tlen, 1, &table)) != 0) WT_RET_MSG(session, (ret == WT_NOTFOUND) ? ENOENT : ret, "Can't create '%s' for non-existent table '%.*s'", name, (int)tlen, tablename); /* Make sure the column group is referenced from the table. */ if (cgname != NULL && (ret = __wt_config_subgets(session, &table->cgconf, cgname, &cval)) != 0) WT_ERR_MSG(session, EINVAL, "Column group '%s' not found in table '%.*s'", cgname, (int)tlen, tablename); /* Find the first NULL entry in the cfg stack. */ for (cfgp = &cfg[1]; *cfgp; cfgp++) ; /* Add the source to the colgroup config before collapsing. */ if (__wt_config_getones( session, config, "source", &cval) == 0 && cval.len != 0) { WT_ERR(__wt_buf_fmt( session, &namebuf, "%.*s", (int)cval.len, cval.str)); source = namebuf.data; } else { WT_ERR(__wt_schema_colgroup_source( session, table, cgname, config, &namebuf)); source = namebuf.data; WT_ERR(__wt_buf_fmt( session, &confbuf, "source=\"%s\"", source)); *cfgp++ = confbuf.data; } /* Calculate the key/value formats: these go into the source config. */ WT_ERR(__wt_buf_fmt(session, &fmt, "key_format=%s", table->key_format)); if (cgname == NULL) WT_ERR(__wt_buf_catfmt (session, &fmt, ",value_format=%s", table->value_format)); else { if (__wt_config_getones(session, config, "columns", &cval) != 0) WT_ERR_MSG(session, EINVAL, "No 'columns' configuration for '%s'", name); WT_ERR(__wt_buf_catfmt(session, &fmt, ",value_format=")); WT_ERR(__wt_struct_reformat(session, table, cval.str, cval.len, NULL, 1, &fmt)); } sourcecfg[1] = fmt.data; WT_ERR(__wt_config_merge(session, sourcecfg, NULL, &sourceconf)); WT_ERR(__wt_schema_create(session, source, sourceconf)); WT_ERR(__wt_config_collapse(session, cfg, &cgconf)); if ((ret = __wt_metadata_insert(session, name, cgconf)) != 0) { /* * If the entry already exists in the metadata, we're done. * This is an error for exclusive creates but okay otherwise. */ if (ret == WT_DUPLICATE_KEY) ret = exclusive ? EEXIST : 0; goto err; } WT_ERR(__wt_schema_open_colgroups(session, table)); err: __wt_free(session, cgconf); __wt_free(session, sourceconf); __wt_free(session, oldconf); __wt_buf_free(session, &confbuf); __wt_buf_free(session, &fmt); __wt_buf_free(session, &namebuf); __wt_schema_release_table(session, table); return (ret); }
/* * __wt_lsm_meta_write -- * Write the metadata for an LSM tree. */ int __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_CHUNK *chunk; u_int i; int first; WT_RET(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "key_format=%s,value_format=%s,bloom_config=(%s),file_config=(%s)", lsm_tree->key_format, lsm_tree->value_format, lsm_tree->bloom_config, lsm_tree->file_config)); if (lsm_tree->collator_name != NULL) WT_ERR(__wt_buf_catfmt( session, buf, ",collator=%s", lsm_tree->collator_name)); WT_ERR(__wt_buf_catfmt(session, buf, ",last=%" PRIu32 ",chunk_count_limit=%" PRIu32 ",chunk_max=%" PRIu64 ",chunk_size=%" PRIu64 ",auto_throttle=%" PRIu32 ",merge_max=%" PRIu32 ",merge_min=%" PRIu32 ",bloom=%" PRIu32 ",bloom_bit_count=%" PRIu32 ",bloom_hash_count=%" PRIu32, lsm_tree->last, lsm_tree->chunk_count_limit, lsm_tree->chunk_max, lsm_tree->chunk_size, F_ISSET(lsm_tree, WT_LSM_TREE_THROTTLE) ? 1 : 0, lsm_tree->merge_max, lsm_tree->merge_min, lsm_tree->bloom, lsm_tree->bloom_bit_count, lsm_tree->bloom_hash_count)); WT_ERR(__wt_buf_catfmt(session, buf, ",chunks=[")); for (i = 0; i < lsm_tree->nchunks; i++) { chunk = lsm_tree->chunk[i]; if (i > 0) WT_ERR(__wt_buf_catfmt(session, buf, ",")); WT_ERR(__wt_buf_catfmt(session, buf, "id=%" PRIu32, chunk->id)); if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) WT_ERR(__wt_buf_catfmt(session, buf, ",bloom")); if (chunk->size != 0) WT_ERR(__wt_buf_catfmt(session, buf, ",chunk_size=%" PRIu64, chunk->size)); if (chunk->count != 0) WT_ERR(__wt_buf_catfmt( session, buf, ",count=%" PRIu64, chunk->count)); WT_ERR(__wt_buf_catfmt( session, buf, ",generation=%" PRIu32, chunk->generation)); } WT_ERR(__wt_buf_catfmt(session, buf, "]")); WT_ERR(__wt_buf_catfmt(session, buf, ",old_chunks=[")); first = 1; for (i = 0; i < lsm_tree->nold_chunks; i++) { chunk = lsm_tree->old_chunks[i]; WT_ASSERT(session, chunk != NULL); if (first) first = 0; else WT_ERR(__wt_buf_catfmt(session, buf, ",")); WT_ERR(__wt_buf_catfmt(session, buf, "\"%s\"", chunk->uri)); if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) WT_ERR(__wt_buf_catfmt( session, buf, ",bloom=\"%s\"", chunk->bloom_uri)); } WT_ERR(__wt_buf_catfmt(session, buf, "]")); ret = __wt_metadata_update(session, lsm_tree->name, buf->data); WT_ERR(ret); err: __wt_scr_free(session, &buf); return (ret); }
/* * __wt_struct_plan -- * Given a table cursor containing a complete table, build the "projection * plan" to distribute the columns to dependent stores. A string * representing the plan will be appended to the plan buffer. */ int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, int value_only, WT_ITEM *plan) { WT_BTREE *saved_btree; WT_CONFIG conf; WT_CONFIG_ITEM k, v; WT_DECL_RET; int cg, col, current_cg, current_col, start_cg, start_col; int i, have_it; char coltype, current_coltype; saved_btree = session->btree; start_cg = start_col = -1; /* -Wuninitialized */ /* Work through the value columns by skipping over the key columns. */ WT_ERR(__wt_config_initn(session, &conf, columns, len)); if (value_only) for (i = 0; i < table->nkey_columns; i++) WT_ERR(__wt_config_next(&conf, &k, &v)); current_cg = cg = 0; current_col = col = INT_MAX; current_coltype = coltype = WT_PROJ_KEY; /* Keep lint quiet. */ while (__wt_config_next(&conf, &k, &v) == 0) { have_it = 0; while (__find_next_col(session, table, &k, &cg, &col, &coltype) == 0 && (!have_it || cg != start_cg || col != start_col)) { /* * First we move to the column. If that is in a * different column group to the last column we * accessed, or before the last column in the same * column group, or moving from the key to the value, * we need to switch column groups or rewind. */ if (current_cg != cg || current_col > col || current_coltype != coltype) { WT_ASSERT(session, !value_only || coltype == WT_PROJ_VALUE); WT_ERR(__wt_buf_catfmt( session, plan, "%d%c", cg, coltype)); /* * Set the current column group and column * within the table. */ current_cg = cg; current_col = 0; current_coltype = coltype; } /* Now move to the column we want. */ if (current_col < col) { if (col - current_col > 1) WT_ERR(__wt_buf_catfmt(session, plan, "%d", col - current_col)); WT_ERR(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_SKIP)); } /* * Now copy the value in / out. In the common case, * where each value is used in one column, we do a * "next" operation. If the value is used again, we do * a "reuse" operation to avoid making another copy. */ if (!have_it) { WT_ERR(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_NEXT)); start_cg = cg; start_col = col; have_it = 1; } else WT_ERR(__wt_buf_catfmt(session, plan, "%c", WT_PROJ_REUSE)); current_col = col + 1; } } err: session->btree = saved_btree; return (ret); }