/* * __logmgr_sync_cfg -- * Interpret the transaction_sync config. */ static int __logmgr_sync_cfg(WT_SESSION_IMPL *session, const char **cfg) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; conn = S2C(session); WT_RET( __wt_config_gets(session, cfg, "transaction_sync.enabled", &cval)); if (cval.val) FLD_SET(conn->txn_logsync, WT_LOG_SYNC_ENABLED); else FLD_CLR(conn->txn_logsync, WT_LOG_SYNC_ENABLED); WT_RET( __wt_config_gets(session, cfg, "transaction_sync.method", &cval)); FLD_CLR(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH | WT_LOG_FSYNC); if (WT_STRING_MATCH("dsync", cval.str, cval.len)) FLD_SET(conn->txn_logsync, WT_LOG_DSYNC | WT_LOG_FLUSH); else if (WT_STRING_MATCH("fsync", cval.str, cval.len)) FLD_SET(conn->txn_logsync, WT_LOG_FSYNC); else if (WT_STRING_MATCH("none", cval.str, cval.len)) FLD_SET(conn->txn_logsync, WT_LOG_FLUSH); return (0); }
/* * __wt_txn_begin -- * Begin a transaction. */ int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; conn = S2C(session); txn = &session->txn; txn_global = &conn->txn_global; txn_state = &txn_global->states[session->id]; WT_ASSERT(session, txn_state->id == WT_TXN_NONE); WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len == 0) txn->isolation = session->isolation; else txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? TXN_ISO_SNAPSHOT : WT_STRING_MATCH("read-committed", cval.str, cval.len) ? TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED; /* * Allocate a transaction ID. * * We use an atomic compare and swap to ensure that we get a * unique ID that is published before the global counter is * updated. * * If two threads race to allocate an ID, only the latest ID * will proceed. The winning thread can be sure its snapshot * contains all of the earlier active IDs. Threads that race * and get an earlier ID may not appear in the snapshot, but * they will loop and allocate a new ID before proceeding to * make any updates. * * This potentially wastes transaction IDs when threads race to * begin transactions: that is the price we pay to keep this * path latch free. */ do { txn_state->id = txn->id = txn_global->current; } while (!WT_ATOMIC_CAS(txn_global->current, txn->id, txn->id + 1)); /* * If we have used 64-bits of transaction IDs, there is nothing * more we can do. */ if (txn->id == WT_TXN_ABORTED) WT_RET_MSG(session, ENOMEM, "Out of transaction IDs"); F_SET(txn, TXN_RUNNING); if (txn->isolation == TXN_ISO_SNAPSHOT) __wt_txn_refresh(session, WT_TXN_NONE, 1); return (0); }
/* * __session_reconfigure -- * WT_SESSION->reconfigure method. */ static int __session_reconfigure(WT_SESSION *wt_session, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, reconfigure, config, cfg); if (F_ISSET(&session->txn, TXN_RUNNING)) WT_ERR_MSG(session, EINVAL, "transaction in progress"); WT_TRET(__session_reset_cursors(session)); WT_ERR(__wt_config_gets_defno(session, cfg, "isolation", &cval)); if (cval.len != 0) { if (!F_ISSET(S2C(session), WT_CONN_TRANSACTIONAL)) WT_ERR_MSG(session, EINVAL, "Database not configured for transactions"); session->isolation = session->txn.isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? TXN_ISO_SNAPSHOT : WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ? TXN_ISO_READ_UNCOMMITTED : TXN_ISO_READ_COMMITTED; } err: API_END_NOTFOUND_MAP(session, ret); }
/* * __wt_txn_config -- * Configure a transaction. */ int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_TXN *txn; txn = &session->txn; WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len != 0) txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? WT_ISO_SNAPSHOT : WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED; /* * The default sync setting is inherited from the connection, but can * be overridden by an explicit "sync" setting for this transaction. * * We want to distinguish between inheriting implicitly and explicitly. */ F_CLR(txn, WT_TXN_SYNC_SET); WT_RET(__wt_config_gets_def( session, cfg, "sync", (int)UINT_MAX, &cval)); if (cval.val == 0 || cval.val == 1) /* * This is an explicit setting of sync. Set the flag so * that we know not to overwrite it in commit_transaction. */ F_SET(txn, WT_TXN_SYNC_SET); /* * If sync is turned off explicitly, clear the transaction's sync field. */ if (cval.val == 0) txn->txn_logsync = 0; WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval)); if (cval.len > 0) /* * The layering here isn't ideal - the named snapshot get * function does both validation and setup. Otherwise we'd * need to walk the list of named snapshots twice during * transaction open. */ WT_RET(__wt_txn_named_snapshot_get(session, &cval)); return (0); }
/* * __wt_huffman_confchk -- * Verify Huffman configuration. */ int __wt_huffman_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) { if (v->len == 0) return (0); /* Standard Huffman encodings, no work to be done. */ if (WT_STRING_MATCH("english", v->str, v->len)) return (0); if (WT_STRING_MATCH("none", v->str, v->len)) return (0); return (__huffman_confchk_file(session, v, NULL, NULL)); }
/* * __wt_schema_index_source -- * Get the URI of the data source for an index. */ int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf) { WT_CONFIG_ITEM cval; WT_DECL_RET; size_t len; const char *prefix, *suffix, *tablename; tablename = table->name + strlen("table:"); if ((ret = __wt_config_getones(session, config, "type", &cval)) == 0 && !WT_STRING_MATCH("file", cval.str, cval.len)) { prefix = cval.str; len = cval.len; suffix = "_idx"; } else { prefix = "file"; len = strlen(prefix); suffix = ".wti"; } WT_RET_NOTFOUND_OK(ret); WT_RET(__wt_buf_fmt(session, buf, "%.*s:%s_%s%s", (int)len, prefix, tablename, idxname, suffix)); return (0); }
/* * __curmetadata_search_near -- * WT_CURSOR->search_near method for the metadata cursor type. */ static int __curmetadata_search_near(WT_CURSOR *cursor, int *exact) { WT_CURSOR *file_cursor; WT_CURSOR_METADATA *mdc; WT_DECL_RET; WT_SESSION_IMPL *session; mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; CURSOR_API_CALL(cursor, session, search_near, ((WT_CURSOR_BTREE *)file_cursor)->btree); WT_MD_CURSOR_NEEDKEY(cursor); if (WT_STRING_MATCH( (char *)cursor->key.data, "metadata:", cursor->key.size - 1)) { WT_ERR(__curmetadata_metadata_search(session, cursor)); *exact = 1; } else { WT_ERR(file_cursor->search_near(file_cursor, exact)); WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor); } err: if (ret != 0) { F_CLR(mdc, WT_MDC_POSITIONED | WT_MDC_ONMETADATA); F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); } API_END(session, ret); return (ret); }
/* * __wt_config_getones_none -- * Get the value for a given string key from a single config string. * Treat "none" as empty. */ int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) { WT_RET(__wt_config_getones(session, config, key, value)); if (WT_STRING_MATCH("none", value->str, value->len)) value->len = 0; return (0); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; bool bitmap, bulk; bitmap = bulk = false; flags = 0; WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.type == WT_CONFIG_ITEM_BOOL || (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) { bitmap = false; bulk = cval.val != 0; } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = true; else WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE); /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* * If we are opening a bulk cursor, get the handle while * holding the checkpoint lock. This prevents a bulk cursor * open failing with EBUSY due to a database-wide checkpoint. */ if (bulk) __wt_spin_lock( session, &S2C(session)->checkpoint_lock); ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags); if (bulk) __wt_spin_unlock( session, &S2C(session)->checkpoint_lock); WT_RET(ret); } else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); /* Increment the data-source's in-use counter. */ __wt_cursor_dhandle_incr_use(session); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __wt_txn_reconfigure -- * WT_SESSION::reconfigure for transactions. */ int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_RET; WT_TXN *txn; txn = &session->txn; ret = __wt_config_getones(session, config, "isolation", &cval); if (ret == 0 && cval.len != 0) { session->isolation = txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? WT_ISO_SNAPSHOT : WT_STRING_MATCH("read-uncommitted", cval.str, cval.len) ? WT_ISO_READ_UNCOMMITTED : WT_ISO_READ_COMMITTED; } WT_RET_NOTFOUND_OK(ret); return (0); }
/* * __logmgr_config -- * Parse and setup the logging server options. */ static int __logmgr_config(WT_SESSION_IMPL *session, const char **cfg, int *runp) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; conn = S2C(session); /* * The logging configuration is off by default. */ WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); *runp = cval.val != 0; /* * Setup a log path, compression and encryption even if logging is * disabled in case we are going to print a log. */ conn->log_compressor = NULL; WT_RET(__wt_config_gets_none(session, cfg, "log.compressor", &cval)); WT_RET(__wt_compressor_config(session, &cval, &conn->log_compressor)); WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path)); /* We are done if logging isn't enabled. */ if (*runp == 0) return (0); WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); /* * If pre-allocation is configured, set the initial number to one. * We'll adapt as load dictates. */ if (cval.val != 0) { FLD_SET(conn->log_flags, WT_CONN_LOG_PREALLOC); conn->log_prealloc = 1; } WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); WT_RET(__logmgr_sync_cfg(session, cfg)); return (0); }
/* * __ckpt_server_config -- * Parse and setup the checkpoint server options. */ static int __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(tmp); WT_DECL_RET; char *p; conn = S2C(session); /* * The checkpoint configuration requires a wait time and/or a log * size -- if one is not set, we're not running at all. * Checkpoints based on log size also require logging be enabled. */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval)); conn->ckpt_usecs = (long)cval.val * 1000000; WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval)); conn->ckpt_logsize = (wt_off_t)cval.val; __wt_log_written_reset(session); if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) || (conn->ckpt_logsize && conn->ckpt_usecs == 0 && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) { *startp = 0; return (0); } *startp = 1; /* * The application can specify a checkpoint name, which we ignore if * it's our default. */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval)); if (cval.len != 0 && !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) { WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp)); WT_ERR(__wt_buf_fmt( session, tmp, "name=%.*s", (int)cval.len, cval.str)); WT_ERR(__wt_strdup(session, tmp->data, &p)); __wt_free(session, conn->ckpt_config); conn->ckpt_config = p; } err: __wt_scr_free(session, &tmp); return (ret); }
/* * __ckpt_named -- * Return the information associated with a file's named checkpoint. */ static int __ckpt_named(WT_SESSION_IMPL *session, const char *checkpoint, const char *config, WT_CKPT *ckpt) { WT_CONFIG ckptconf; WT_CONFIG_ITEM k, v; WT_RET(__wt_config_getones(session, config, "checkpoint", &v)); WT_RET(__wt_config_subinit(session, &ckptconf, &v)); /* * Take the first match: there should never be more than a single * checkpoint of any name. */ while (__wt_config_next(&ckptconf, &k, &v) == 0) if (WT_STRING_MATCH(checkpoint, k.str, k.len)) return (__ckpt_load(session, &k, &v, ckpt)); return (WT_NOTFOUND); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; int bitmap, bulk; uint32_t flags; flags = 0; WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval)); if (cval.type == ITEM_NUM && (cval.val == 0 || cval.val == 1)) { bitmap = 0; bulk = (cval.val != 0); } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = 1; else WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_BTREE_EXCLUSIVE); /* TODO: handle projections. */ /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, flags)); else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __ckpt_named_addr -- * Return the cookie associated with a file's named checkpoint. */ static int __ckpt_named_addr(WT_SESSION_IMPL *session, const char *checkpoint, const char *config, WT_ITEM *addr) { WT_CONFIG ckptconf; WT_CONFIG_ITEM a, k, v; WT_RET(__wt_config_getones(session, config, "checkpoint", &v)); WT_RET(__wt_config_subinit(session, &ckptconf, &v)); /* * Take the first match: there should never be more than a single * checkpoint of any name. */ while (__wt_config_next(&ckptconf, &k, &v) == 0) if (WT_STRING_MATCH(checkpoint, k.str, k.len)) { WT_RET(__wt_config_subgets(session, &v, "addr", &a)); if (a.len != 0) WT_RET(__wt_nhex_to_raw( session, a.str, a.len, addr)); return (0); } return (WT_NOTFOUND); }
/* * __ckpt_server_config -- * Parse and setup the checkpoint server options. */ static int __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(tmp); WT_DECL_RET; conn = S2C(session); /* * The checkpoint configuration requires a wait time -- if it's not set, * we're not running at all. */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval)); if (cval.val == 0) { *startp = 0; return (0); } conn->ckpt_usecs = (long)cval.val * 1000000; *startp = 1; WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval)); if (!WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) { WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp)); strcpy((char *)tmp->data, "name="); strncat((char *)tmp->data, cval.str, cval.len); ret = __wt_strndup(session, tmp->data, strlen("name=") + cval.len, &conn->ckpt_config); __wt_scr_free(&tmp); WT_RET(ret); } return (0); }
/* * __wt_lsm_tree_create -- * Create an LSM tree structure for the given name. */ int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, int exclusive, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_TREE *lsm_tree; const char *cfg[] = { WT_CONFIG_BASE(session, session_create), config, NULL }; const char *tmpconfig; /* If the tree is open, it already exists. */ if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) { __wt_lsm_tree_release(session, lsm_tree); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); /* * If the tree has metadata, it already exists. * * !!! * Use a local variable: we don't care what the existing configuration * is, but we don't want to overwrite the real config. */ if (__wt_metadata_search(session, uri, &tmpconfig) == 0) { __wt_free(session, tmpconfig); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); WT_RET(__wt_config_gets(session, cfg, "key_format", &cval)); if (WT_STRING_MATCH("r", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "LSM trees cannot be configured as column stores"); WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->key_format)); WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->value_format)); WT_ERR(__wt_config_gets(session, cfg, "collator", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->collator_name)); WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval)); if (cval.val) F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); else F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval)); FLD_SET(lsm_tree->bloom, (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval)); if (cval.val != 0) FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)) WT_ERR_MSG(session, EINVAL, "Bloom filters can only be created on newest and oldest " "chunks if bloom filters are enabled"); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval)); if (cval.type == WT_CONFIG_ITEM_STRUCT) { cval.str++; cval.len -= 2; } WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval)); lsm_tree->bloom_bit_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval)); lsm_tree->bloom_hash_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval)); lsm_tree->chunk_max = (uint64_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval)); lsm_tree->chunk_size = (uint64_t)cval.val; if (lsm_tree->chunk_size > lsm_tree->chunk_max) WT_ERR_MSG(session, EINVAL, "Chunk size (chunk_size) must be smaller than or equal to " "the maximum chunk size (chunk_max)"); WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval)); lsm_tree->merge_max = (uint32_t)cval.val; lsm_tree->merge_min = lsm_tree->merge_max / 2; WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_threads", &cval)); lsm_tree->merge_threads = (uint32_t)cval.val; /* Sanity check that api_data.py is in sync with lsm.h */ WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS); /* * Set up the config for each chunk. If possible, avoid high latencies * from fsync by flushing the cache every 8MB (will be overridden by * any application setting). */ tmpconfig = ""; #ifdef HAVE_SYNC_FILE_RANGE if (!S2C(session)->direct_io) tmpconfig = "os_cache_dirty_max=8MB,"; #endif WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "%s%s,key_format=u,value_format=u", tmpconfig, config)); lsm_tree->file_config = __wt_buf_steal(session, buf, NULL); /* Create the first chunk and flush the metadata. */ WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); /* Discard our partially populated handle. */ ret = __lsm_tree_discard(session, lsm_tree); lsm_tree = NULL; /* * Open our new tree and add it to the handle cache. Don't discard on * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ if (ret == 0) ret = __lsm_tree_open(session, uri, &lsm_tree); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); if (0) { err: WT_TRET(__lsm_tree_discard(session, lsm_tree)); } __wt_scr_free(&buf); return (ret); }
/* * __wt_lsm_tree_create -- * Create an LSM tree structure for the given name. */ int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, int exclusive, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_TREE *lsm_tree; const char *cfg[] = API_CONF_DEFAULTS(session, create, config); const char *tmpconfig; /* If the tree is open, it already exists. */ if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) { __wt_lsm_tree_release(session, lsm_tree); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); /* * If the tree has metadata, it already exists. * * !!! * Use a local variable: we don't care what the existing configuration * is, but we don't want to overwrite the real config. */ if (__wt_metadata_read(session, uri, &tmpconfig) == 0) { __wt_free(session, tmpconfig); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); WT_RET(__wt_config_gets(session, cfg, "key_format", &cval)); if (WT_STRING_MATCH("r", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "LSM trees cannot be configured as column stores"); WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); WT_RET(__lsm_tree_set_name(session, lsm_tree, uri)); WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->key_format)); WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->value_format)); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom", &cval)); FLD_SET(lsm_tree->bloom, (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_newest", &cval)); if (cval.val != 0) FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_oldest", &cval)); if (cval.val != 0) FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_NEWEST) || FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))) WT_ERR_MSG(session, EINVAL, "Bloom filters can only be created on newest and oldest " "chunks if bloom filters are enabled"); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_config", &cval)); if (cval.type == ITEM_STRUCT) { cval.str++; cval.len -= 2; } WT_ERR(__wt_strndup(session, cval.str, cval.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_bit_count", &cval)); lsm_tree->bloom_bit_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_bloom_hash_count", &cval)); lsm_tree->bloom_hash_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_chunk_size", &cval)); lsm_tree->chunk_size = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_max", &cval)); lsm_tree->merge_max = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm_merge_threads", &cval)); lsm_tree->merge_threads = (uint32_t)cval.val; /* Sanity check that api_data.py is in sync with lsm.h */ WT_ASSERT(session, lsm_tree->merge_threads <= WT_LSM_MAX_WORKERS); WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "%s,key_format=u,value_format=u", config)); lsm_tree->file_config = __wt_buf_steal(session, buf, NULL); /* Create the first chunk and flush the metadata. */ WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); /* Discard our partially populated handle. */ ret = __lsm_tree_discard(session, lsm_tree); lsm_tree = NULL; /* * Open our new tree and add it to the handle cache. Don't discard on * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ if (ret == 0) ret = __lsm_tree_open(session, uri, &lsm_tree); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); if (0) { err: WT_TRET(__lsm_tree_discard(session, lsm_tree)); } __wt_scr_free(&buf); return (ret); }
/* * config_check -- * Check the keys in an application-supplied config string match what is * specified in an array of check strings. */ static int config_check(WT_SESSION_IMPL *session, const WT_CONFIG_CHECK *checks, const char *config, size_t config_len) { WT_CONFIG parser, cparser, sparser; WT_CONFIG_ITEM k, v, ck, cv, dummy; WT_DECL_RET; int i; bool badtype, found; /* * The config_len parameter is optional, and allows passing in strings * that are not nul-terminated. */ if (config_len == 0) WT_RET(__wt_config_init(session, &parser, config)); else WT_RET(__wt_config_initn(session, &parser, config, config_len)); while ((ret = __wt_config_next(&parser, &k, &v)) == 0) { if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID) WT_RET_MSG(session, EINVAL, "Invalid configuration key found: '%.*s'", (int)k.len, k.str); /* Search for a matching entry. */ for (i = 0; checks[i].name != NULL; i++) if (WT_STRING_MATCH(checks[i].name, k.str, k.len)) break; if (checks[i].name == NULL) WT_RET_MSG(session, EINVAL, "unknown configuration key: '%.*s'", (int)k.len, k.str); if (strcmp(checks[i].type, "boolean") == 0) { badtype = v.type != WT_CONFIG_ITEM_BOOL && (v.type != WT_CONFIG_ITEM_NUM || (v.val != 0 && v.val != 1)); } else if (strcmp(checks[i].type, "category") == 0) { /* Deal with categories of the form: XXX=(XXX=blah). */ ret = config_check(session, checks[i].subconfigs, k.str + strlen(checks[i].name) + 1, v.len); if (ret != EINVAL) badtype = false; else badtype = true; } else if (strcmp(checks[i].type, "format") == 0) { badtype = false; } else if (strcmp(checks[i].type, "int") == 0) { badtype = v.type != WT_CONFIG_ITEM_NUM; } else if (strcmp(checks[i].type, "list") == 0) { badtype = v.len > 0 && v.type != WT_CONFIG_ITEM_STRUCT; } else if (strcmp(checks[i].type, "string") == 0) { badtype = false; } else WT_RET_MSG(session, EINVAL, "unknown configuration type: '%s'", checks[i].type); if (badtype) WT_RET_MSG(session, EINVAL, "Invalid value for key '%.*s': expected a %s", (int)k.len, k.str, checks[i].type); if (checks[i].checkf != NULL) WT_RET(checks[i].checkf(session, &v)); if (checks[i].checks == NULL) continue; /* Setup an iterator for the check string. */ WT_RET(__wt_config_init(session, &cparser, checks[i].checks)); while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) { if (WT_STRING_MATCH("min", ck.str, ck.len)) { if (v.val < cv.val) WT_RET_MSG(session, EINVAL, "Value too small for key '%.*s' " "the minimum is %.*s", (int)k.len, k.str, (int)cv.len, cv.str); } else if (WT_STRING_MATCH("max", ck.str, ck.len)) { if (v.val > cv.val) WT_RET_MSG(session, EINVAL, "Value too large for key '%.*s' " "the maximum is %.*s", (int)k.len, k.str, (int)cv.len, cv.str); } else if (WT_STRING_MATCH("choices", ck.str, ck.len)) { if (v.len == 0) WT_RET_MSG(session, EINVAL, "Key '%.*s' requires a value", (int)k.len, k.str); if (v.type == WT_CONFIG_ITEM_STRUCT) { /* * Handle the 'verbose' case of a list * containing restricted choices. */ WT_RET(__wt_config_subinit(session, &sparser, &v)); found = true; while (found && (ret = __wt_config_next(&sparser, &v, &dummy)) == 0) { ret = __wt_config_subgetraw( session, &cv, &v, &dummy); found = ret == 0; } } else { ret = __wt_config_subgetraw(session, &cv, &v, &dummy); found = ret == 0; } if (ret != 0 && ret != WT_NOTFOUND) return (ret); if (!found) WT_RET_MSG(session, EINVAL, "Value '%.*s' not a " "permitted choice for key '%.*s'", (int)v.len, v.str, (int)k.len, k.str); } else WT_RET_MSG(session, EINVAL, "unexpected configuration description " "keyword %.*s", (int)ck.len, ck.str); } } if (ret == WT_NOTFOUND) ret = 0; return (ret); }
/* * __logmgr_config -- * Parse and setup the logging server options. */ static int __logmgr_config( WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; bool enabled; conn = S2C(session); WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); enabled = cval.val != 0; /* * If we're reconfiguring, enabled must match the already * existing setting. * * If it is off and the user it turning it on, or it is on * and the user is turning it off, return an error. */ if (reconfig && ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)))) return (EINVAL); /* Logging is incompatible with in-memory */ if (enabled) { WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) WT_RET_MSG(session, EINVAL, "In memory configuration incompatible with " "log=(enabled=true)"); } *runp = enabled; /* * Setup a log path and compression even if logging is disabled in case * we are going to print a log. Only do this on creation. Once a * compressor or log path are set they cannot be changed. */ if (!reconfig) { conn->log_compressor = NULL; WT_RET(__wt_config_gets_none( session, cfg, "log.compressor", &cval)); WT_RET(__wt_compressor_config( session, &cval, &conn->log_compressor)); WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); WT_RET(__wt_strndup( session, cval.str, cval.len, &conn->log_path)); } /* We are done if logging isn't enabled. */ if (!*runp) return (0); WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); if (!reconfig) { /* * Ignore if the user tries to change the file size. The * amount of memory allocated to the log slots may be based * on the log file size at creation and we don't want to * re-allocate that memory while running. */ WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); } /* * If pre-allocation is configured, set the initial number to a few. * We'll adapt as load dictates. */ WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); if (cval.val != 0) conn->log_prealloc = 1; /* * Note that it is meaningless to reconfigure this value during * runtime. It only matters on create before recovery runs. */ WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval)); if (cval.val != 0) { if (F_ISSET(conn, WT_CONN_READONLY)) WT_RET_MSG(session, EINVAL, "Read-only configuration incompatible with " "zero-filling log files"); FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL); } WT_RET(__logmgr_sync_cfg(session, cfg)); if (conn->log_cond != NULL) WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); return (0); }
/* * __wt_txn_commit -- * Commit the current transaction. */ int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_OP *op; u_int i; bool locked, readonly; #ifdef HAVE_TIMESTAMPS wt_timestamp_t prev_commit_timestamp, ts; bool update_timestamp; #endif txn = &session->txn; conn = S2C(session); txn_global = &conn->txn_global; locked = false; WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); readonly = txn->mod_count == 0; /* * Look for a commit timestamp. */ WT_ERR( __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval)); if (cval.len != 0) { #ifdef HAVE_TIMESTAMPS WT_ERR(__wt_txn_parse_timestamp(session, "commit", &ts, &cval)); WT_ERR(__wt_timestamp_validate(session, "commit", &ts, &cval, true, true, true)); __wt_timestamp_set(&txn->commit_timestamp, &ts); __wt_txn_set_commit_timestamp(session); #else WT_ERR_MSG(session, EINVAL, "commit_timestamp requires a " "version of WiredTiger built with timestamp support"); #endif } #ifdef HAVE_TIMESTAMPS /* * Debugging checks on timestamps, if user requested them. */ if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && txn->mod_count != 0) WT_ERR_MSG(session, EINVAL, "commit_timestamp required and " "none set on this transaction"); if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && txn->mod_count != 0) WT_ERR_MSG(session, EINVAL, "no commit_timestamp required and " "timestamp set on this transaction"); #endif /* * The default sync setting is inherited from the connection, but can * be overridden by an explicit "sync" setting for this transaction. */ WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval)); /* * If the user chose the default setting, check whether sync is enabled * for this transaction (either inherited or via begin_transaction). * If sync is disabled, clear the field to avoid the log write being * flushed. * * Otherwise check for specific settings. We don't need to check for * "on" because that is the default inherited from the connection. If * the user set anything in begin_transaction, we only override with an * explicit setting. */ if (cval.len == 0) { if (!FLD_ISSET(txn->txn_logsync, WT_LOG_SYNC_ENABLED) && !F_ISSET(txn, WT_TXN_SYNC_SET)) txn->txn_logsync = 0; } else { /* * If the caller already set sync on begin_transaction then * they should not be using sync on commit_transaction. * Flag that as an error. */ if (F_ISSET(txn, WT_TXN_SYNC_SET)) WT_ERR_MSG(session, EINVAL, "Sync already set during begin_transaction"); if (WT_STRING_MATCH("background", cval.str, cval.len)) txn->txn_logsync = WT_LOG_BACKGROUND; else if (WT_STRING_MATCH("off", cval.str, cval.len)) txn->txn_logsync = 0; /* * We don't need to check for "on" here because that is the * default to inherit from the connection setting. */ } /* Commit notification. */ if (txn->notify != NULL) WT_ERR(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 1)); /* * We are about to release the snapshot: copy values into any * positioned cursors so they don't point to updates that could be * freed once we don't have a snapshot. */ if (session->ncursors > 0) { WT_DIAGNOSTIC_YIELD; WT_ERR(__wt_session_copy_values(session)); } /* If we are logging, write a commit log record. */ if (txn->logrec != NULL && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && !F_ISSET(session, WT_SESSION_NO_LOGGING)) { /* * We are about to block on I/O writing the log. * Release our snapshot in case it is keeping data pinned. * This is particularly important for checkpoints. */ __wt_txn_release_snapshot(session); /* * We hold the visibility lock for reading from the time * we write our log record until the time we release our * transaction so that the LSN any checkpoint gets will * always reflect visible data. */ __wt_readlock(session, &txn_global->visibility_rwlock); locked = true; WT_ERR(__wt_txn_log_commit(session, cfg)); } /* Note: we're going to commit: nothing can fail after this point. */ /* Process and free updates. */ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) { switch (op->type) { case WT_TXN_OP_BASIC: case WT_TXN_OP_BASIC_TS: case WT_TXN_OP_INMEM: /* * Switch reserved operations to abort to * simplify obsolete update list truncation. */ if (op->u.upd->type == WT_UPDATE_RESERVED) { op->u.upd->txnid = WT_TXN_ABORTED; break; } /* * Writes to the lookaside file can be evicted as soon * as they commit. */ if (conn->cache->las_fileid != 0 && op->fileid == conn->cache->las_fileid) { op->u.upd->txnid = WT_TXN_NONE; break; } #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && op->type != WT_TXN_OP_BASIC_TS) { WT_ASSERT(session, op->fileid != WT_METAFILE_ID); __wt_timestamp_set(&op->u.upd->timestamp, &txn->commit_timestamp); } #endif break; case WT_TXN_OP_REF: #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) __wt_timestamp_set( &op->u.ref->page_del->timestamp, &txn->commit_timestamp); #endif break; case WT_TXN_OP_TRUNCATE_COL: case WT_TXN_OP_TRUNCATE_ROW: /* Other operations don't need timestamps. */ break; } __wt_txn_op_free(session, op); } txn->mod_count = 0; #ifdef HAVE_TIMESTAMPS /* * Track the largest commit timestamp we have seen. * * We don't actually clear the local commit timestamp, just the flag. * That said, we can't update the global commit timestamp until this * transaction is visible, which happens when we release it. */ update_timestamp = F_ISSET(txn, WT_TXN_HAS_TS_COMMIT); #endif __wt_txn_release(session); if (locked) __wt_readunlock(session, &txn_global->visibility_rwlock); #ifdef HAVE_TIMESTAMPS /* First check if we've already committed something in the future. */ if (update_timestamp) { WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock, __wt_timestamp_set( &prev_commit_timestamp, &txn_global->commit_timestamp)); update_timestamp = __wt_timestamp_cmp( &txn->commit_timestamp, &prev_commit_timestamp) > 0; } /* * If it looks like we need to move the global commit timestamp, * write lock and re-check. */ if (update_timestamp) { #if WT_TIMESTAMP_SIZE == 8 while (__wt_timestamp_cmp( &txn->commit_timestamp, &prev_commit_timestamp) > 0) { if (__wt_atomic_cas64( &txn_global->commit_timestamp.val, prev_commit_timestamp.val, txn->commit_timestamp.val)) { txn_global->has_commit_timestamp = true; break; } __wt_timestamp_set( &prev_commit_timestamp, &txn_global->commit_timestamp); } #else __wt_writelock(session, &txn_global->rwlock); if (__wt_timestamp_cmp(&txn->commit_timestamp, &txn_global->commit_timestamp) > 0) { __wt_timestamp_set(&txn_global->commit_timestamp, &txn->commit_timestamp); txn_global->has_commit_timestamp = true; } __wt_writeunlock(session, &txn_global->rwlock); #endif } #endif /* * We're between transactions, if we need to block for eviction, it's * a good time to do so. Note that we must ignore any error return * because the user's data is committed. */ if (!readonly) (void)__wt_cache_eviction_check(session, false, false, NULL); return (0); err: /* * If anything went wrong, roll back. * * !!! * Nothing can fail after this point. */ if (locked) __wt_readunlock(session, &txn_global->visibility_rwlock); WT_TRET(__wt_txn_rollback(session, cfg)); return (ret); }
/* * __wt_curfile_create -- * Open a cursor for a given btree handle. */ int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { static WT_CURSOR iface = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, __curfile_compare, __curfile_next, __curfile_prev, __curfile_reset, __curfile_search, __curfile_search_near, __curfile_insert, __curfile_update, __curfile_remove, __curfile_close, { NULL, NULL }, /* TAILQ_ENTRY q */ 0, /* recno key */ { 0 }, /* recno raw buffer */ { NULL, 0, 0, NULL, 0 },/* WT_ITEM key */ { NULL, 0, 0, NULL, 0 },/* WT_ITEM value */ 0, /* int saved_err */ 0 /* uint32_t flags */ }; WT_BTREE *btree; WT_CONFIG_ITEM cval; WT_CURSOR *cursor; WT_CURSOR_BTREE *cbt; WT_DECL_RET; size_t csize; int bitmap, bulk; cbt = NULL; btree = session->btree; WT_ASSERT(session, btree != NULL); WT_RET(__wt_config_gets_defno(session, cfg, "bulk", &cval)); if ((cval.type == ITEM_ID || cval.type == ITEM_STRING) && WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = 1; else { bitmap = 0; bulk = (cval.val != 0); } csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE); WT_RET(__wt_calloc(session, 1, csize, &cbt)); cursor = &cbt->iface; *cursor = iface; cursor->session = &session->iface; cursor->uri = btree->name; cursor->key_format = btree->key_format; cursor->value_format = btree->value_format; cbt->btree = session->btree; if (bulk) WT_ERR(__wt_curbulk_init((WT_CURSOR_BULK *)cbt, bitmap)); /* * no_cache * No cache cursors are read-only. */ WT_ERR(__wt_config_gets_defno(session, cfg, "no_cache", &cval)); if (cval.val != 0) { cursor->insert = __wt_cursor_notsup; cursor->update = __wt_cursor_notsup; cursor->remove = __wt_cursor_notsup; } /* * random_retrieval * Random retrieval cursors only support next, reset and close. */ WT_ERR(__wt_config_gets_defno(session, cfg, "next_random", &cval)); if (cval.val != 0) { __wt_cursor_set_notsup(cursor); cursor->next = __curfile_next_random; cursor->reset = __curfile_reset; } /* __wt_cursor_init is last so we don't have to clean up on error. */ STATIC_ASSERT(offsetof(WT_CURSOR_BTREE, iface) == 0); WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp)); if (0) { err: __wt_free(session, cbt); } return (ret); }
/* * __wt_txn_commit -- * Commit the current transaction. */ int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_TXN *txn; WT_TXN_OP *op; u_int i; txn = &session->txn; conn = S2C(session); WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); if (!F_ISSET(txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "No transaction is active"); /* * The default sync setting is inherited from the connection, but can * be overridden by an explicit "sync" setting for this transaction. */ WT_RET(__wt_config_gets_def(session, cfg, "sync", 0, &cval)); /* * If the user chose the default setting, check whether sync is enabled * for this transaction (either inherited or via begin_transaction). * If sync is disabled, clear the field to avoid the log write being * flushed. * * Otherwise check for specific settings. We don't need to check for * "on" because that is the default inherited from the connection. If * the user set anything in begin_transaction, we only override with an * explicit setting. */ if (cval.len == 0) { if (!FLD_ISSET(txn->txn_logsync, WT_LOG_FLUSH) && !F_ISSET(txn, WT_TXN_SYNC_SET)) txn->txn_logsync = 0; } else { /* * If the caller already set sync on begin_transaction then * they should not be using sync on commit_transaction. * Flag that as an error. */ if (F_ISSET(txn, WT_TXN_SYNC_SET)) WT_RET_MSG(session, EINVAL, "Sync already set during begin_transaction."); if (WT_STRING_MATCH("background", cval.str, cval.len)) txn->txn_logsync = WT_LOG_BACKGROUND; else if (WT_STRING_MATCH("off", cval.str, cval.len)) txn->txn_logsync = 0; /* * We don't need to check for "on" here because that is the * default to inherit from the connection setting. */ } /* Commit notification. */ if (txn->notify != NULL) WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 1)); /* If we are logging, write a commit log record. */ if (ret == 0 && txn->mod_count > 0 && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && !F_ISSET(session, WT_SESSION_NO_LOGGING)) { /* * We are about to block on I/O writing the log. * Release our snapshot in case it is keeping data pinned. * This is particularly important for checkpoints. */ __wt_txn_release_snapshot(session); ret = __wt_txn_log_commit(session, cfg); WT_ASSERT(session, ret == 0); } /* * If anything went wrong, roll back. * * !!! * Nothing can fail after this point. */ if (ret != 0) { WT_TRET(__wt_txn_rollback(session, cfg)); return (ret); } /* Free memory associated with updates. */ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) __wt_txn_op_free(session, op); txn->mod_count = 0; /* * We are about to release the snapshot: copy values into any * positioned cursors so they don't point to updates that could be * freed once we don't have a transaction ID pinned. */ if (session->ncursors > 0) WT_RET(__wt_session_copy_values(session)); __wt_txn_release(session); return (0); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; bool bitmap, bulk, checkpoint_wait; bitmap = bulk = false; checkpoint_wait = true; flags = 0; /* * Decode the bulk configuration settings. In memory databases * ignore bulk load. */ if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.type == WT_CONFIG_ITEM_BOOL || (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) { bitmap = false; bulk = cval.val != 0; } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = true; /* * Unordered bulk insert is a special case used * internally by index creation on existing tables. It * doesn't enforce any special semantics at the file * level. It primarily exists to avoid some locking * problems between LSM and index creation. */ else if (!WT_STRING_MATCH("unordered", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); if (bulk) { WT_RET(__wt_config_gets(session, cfg, "checkpoint_wait", &cval)); checkpoint_wait = cval.val != 0; } } /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE); /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* * If we are opening exclusive and don't want a bulk cursor * open to fail with EBUSY due to a database-wide checkpoint, * get the handle while holding the checkpoint lock. */ if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait) WT_WITH_CHECKPOINT_LOCK(session, ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags)); else ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags); WT_RET(ret); } else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __schema_open_index -- * Open one or more indices for a table (internal version). */ static int __schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) { WT_CURSOR *cursor; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_INDEX *idx; u_int i; int cmp; bool match; const char *idxconf, *name, *tablename, *uri; /* Check if we've already done the work. */ if (idxname == NULL && table->idx_complete) return (0); cursor = NULL; idx = NULL; match = false; /* Build a search key. */ tablename = table->name; (void)WT_PREFIX_SKIP(tablename, "table:"); WT_ERR(__wt_scr_alloc(session, 512, &tmp)); WT_ERR(__wt_buf_fmt(session, tmp, "index:%s:", tablename)); /* Find matching indices. */ WT_ERR(__wt_metadata_cursor(session, &cursor)); cursor->set_key(cursor, tmp->data); if ((ret = cursor->search_near(cursor, &cmp)) == 0 && cmp < 0) ret = cursor->next(cursor); for (i = 0; ret == 0; i++, ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &uri)); name = uri; if (!WT_PREFIX_SKIP(name, tmp->data)) break; /* Is this the index we are looking for? */ match = idxname == NULL || WT_STRING_MATCH(name, idxname, len); /* * Ensure there is space, including if we have to make room for * a new entry in the middle of the list. */ WT_ERR(__wt_realloc_def(session, &table->idx_alloc, WT_MAX(i, table->nindices) + 1, &table->indices)); /* Keep the in-memory list in sync with the metadata. */ cmp = 0; while (table->indices[i] != NULL && (cmp = strcmp(uri, table->indices[i]->name)) > 0) { /* Index no longer exists, remove it. */ __wt_free(session, table->indices[i]); memmove(&table->indices[i], &table->indices[i + 1], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[--table->nindices] = NULL; } if (cmp < 0) { /* Make room for a new index. */ memmove(&table->indices[i + 1], &table->indices[i], (table->nindices - i) * sizeof(WT_INDEX *)); table->indices[i] = NULL; ++table->nindices; } if (!match) continue; if (table->indices[i] == NULL) { WT_ERR(cursor->get_value(cursor, &idxconf)); WT_ERR(__wt_calloc_one(session, &idx)); WT_ERR(__wt_strdup(session, uri, &idx->name)); WT_ERR(__wt_strdup(session, idxconf, &idx->config)); WT_ERR(__open_index(session, table, idx)); /* * If we're checking the creation of an index before a * table is fully created, don't save the index: it * will need to be reopened once the table is complete. */ if (!table->cg_complete) { WT_ERR( __wt_schema_destroy_index(session, &idx)); if (idxname != NULL) break; continue; } table->indices[i] = idx; idx = NULL; /* * If the slot is bigger than anything else we've seen, * bump the number of indices. */ if (i >= table->nindices) table->nindices = i + 1; } /* If we were looking for a single index, we're done. */ if (indexp != NULL) *indexp = table->indices[i]; if (idxname != NULL) break; } WT_ERR_NOTFOUND_OK(ret); if (idxname != NULL && !match) ret = WT_NOTFOUND; /* If we did a full pass, we won't need to do it again. */ if (idxname == NULL) { table->nindices = i; table->idx_complete = true; } err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); WT_TRET(__wt_schema_destroy_index(session, &idx)); __wt_scr_free(session, &tmp); return (ret); }
/* * __wt_block_open -- * Open a block handle. */ int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], int forced_salvage, int readonly, uint32_t allocsize, WT_BLOCK **blockp) { WT_BLOCK *block; WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_TRET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename)); conn = S2C(session); *blockp = NULL; __wt_spin_lock(session, &conn->block_lock); TAILQ_FOREACH(block, &conn->blockqh, q) if (strcmp(filename, block->name) == 0) { ++block->ref; *blockp = block; __wt_spin_unlock(session, &conn->block_lock); return (0); } /* Basic structure allocation, initialization. */ WT_ERR(__wt_calloc_def(session, 1, &block)); block->ref = 1; TAILQ_INSERT_HEAD(&conn->blockqh, block, q); WT_ERR(__wt_strdup(session, filename, &block->name)); block->allocsize = allocsize; WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval)); block->allocfirst = WT_STRING_MATCH("first", cval.str, cval.len) ? 1 : 0; /* Configuration: optional OS buffer cache maximum size. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval)); block->os_cache_max = (size_t)cval.val; #ifdef HAVE_POSIX_FADVISE if (conn->direct_io && block->os_cache_max) WT_ERR_MSG(session, EINVAL, "os_cache_max not supported in combination with direct_io"); #else if (block->os_cache_max) WT_ERR_MSG(session, EINVAL, "os_cache_max not supported if posix_fadvise not " "available"); #endif /* Configuration: optional immediate write scheduling flag. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval)); block->os_cache_dirty_max = (size_t)cval.val; #ifdef HAVE_SYNC_FILE_RANGE if (conn->direct_io && block->os_cache_dirty_max) WT_ERR_MSG(session, EINVAL, "os_cache_dirty_max not supported in combination with " "direct_io"); #else if (block->os_cache_dirty_max) { /* * Ignore any setting if it is not supported. */ block->os_cache_dirty_max = 0; WT_ERR(__wt_verbose(session, WT_VERB_BLOCK, "os_cache_dirty_max ignored when sync_file_range not " "available")); } #endif /* Open the underlying file handle. */ WT_ERR(__wt_open(session, filename, 0, 0, readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA, &block->fh)); /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); /* * Read the description information from the first block. * * Salvage is a special case: if we're forcing the salvage, we don't * look at anything, including the description information. */ if (!forced_salvage) WT_ERR(__desc_read(session, block)); *blockp = block; __wt_spin_unlock(session, &conn->block_lock); return (0); err: WT_TRET(__block_destroy(session, block)); __wt_spin_unlock(session, &conn->block_lock); return (ret); }
/* * __logmgr_config -- * Parse and setup the logging server options. */ static int __logmgr_config( WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; bool enabled; /* * A note on reconfiguration: the standard "is this configuration string * allowed" checks should fail if reconfiguration has invalid strings, * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because * the connection reconfiguration method doesn't allow those strings. * Additionally, the base configuration values during reconfiguration * are the currently configured values (so we don't revert to default * values when repeatedly reconfiguring), and configuration processing * of a currently set value should not change the currently set value. * * In this code path, log server reconfiguration does not stop/restart * the log server, so there's no point in re-evaluating configuration * strings that cannot be reconfigured, risking bugs in configuration * setup, and depending on evaluation of currently set values to always * result in the currently set value. Skip tests for any configuration * strings which don't make sense during reconfiguration, but don't * worry about error reporting because it should never happen. */ conn = S2C(session); WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); enabled = cval.val != 0; /* * If we're reconfiguring, enabled must match the already * existing setting. * * If it is off and the user it turning it on, or it is on * and the user is turning it off, return an error. * * See above: should never happen. */ if (reconfig && ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)))) return (EINVAL); /* Logging is incompatible with in-memory */ if (enabled) { WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) WT_RET_MSG(session, EINVAL, "In memory configuration incompatible with " "log=(enabled=true)"); } *runp = enabled; /* * Setup a log path and compression even if logging is disabled in case * we are going to print a log. Only do this on creation. Once a * compressor or log path are set they cannot be changed. * * See above: should never happen. */ if (!reconfig) { conn->log_compressor = NULL; WT_RET(__wt_config_gets_none( session, cfg, "log.compressor", &cval)); WT_RET(__wt_compressor_config( session, &cval, &conn->log_compressor)); WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); WT_RET(__wt_strndup( session, cval.str, cval.len, &conn->log_path)); } /* We are done if logging isn't enabled. */ if (!*runp) return (0); WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); /* * The file size cannot be reconfigured. The amount of memory allocated * to the log slots may be based on the log file size at creation and we * don't want to re-allocate that memory while running. * * See above: should never happen. */ if (!reconfig) { WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); } /* * If pre-allocation is configured, set the initial number to a few. * We'll adapt as load dictates. */ WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); if (cval.val != 0) conn->log_prealloc = 1; /* * Note it's meaningless to reconfigure this value during runtime, it * only matters on create before recovery runs. * * See above: should never happen. */ if (!reconfig) { WT_RET(__wt_config_gets_def( session, cfg, "log.recover", 0, &cval)); if (WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); } WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval)); if (cval.val != 0) { if (F_ISSET(conn, WT_CONN_READONLY)) WT_RET_MSG(session, EINVAL, "Read-only configuration incompatible with " "zero-filling log files"); FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL); } WT_RET(__logmgr_sync_cfg(session, cfg)); if (conn->log_cond != NULL) WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); return (0); }
/* * __wt_lsm_tree_create -- * Create an LSM tree structure for the given name. */ int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, int exclusive, const char *config) { WT_CONFIG_ITEM cval; WT_DECL_ITEM(buf); WT_DECL_RET; WT_LSM_TREE *lsm_tree; const char *cfg[] = { WT_CONFIG_BASE(session, session_create), config, NULL }; char *tmpconfig; /* If the tree is open, it already exists. */ if ((ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree)) == 0) { __wt_lsm_tree_release(session, lsm_tree); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); /* * If the tree has metadata, it already exists. * * !!! * Use a local variable: we don't care what the existing configuration * is, but we don't want to overwrite the real config. */ if (__wt_metadata_search(session, uri, &tmpconfig) == 0) { __wt_free(session, tmpconfig); return (exclusive ? EEXIST : 0); } WT_RET_NOTFOUND_OK(ret); WT_RET(__wt_config_gets(session, cfg, "key_format", &cval)); if (WT_STRING_MATCH("r", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "LSM trees cannot be configured as column stores"); WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); WT_ERR(__wt_config_gets(session, cfg, "key_format", &cval)); WT_ERR(__wt_strndup( session, cval.str, cval.len, &lsm_tree->key_format)); WT_ERR(__wt_config_gets(session, cfg, "value_format", &cval)); WT_ERR(__wt_strndup( session, cval.str, cval.len, &lsm_tree->value_format)); WT_ERR(__wt_config_gets(session, cfg, "collator", &cval)); WT_ERR(__wt_strndup( session, cval.str, cval.len, &lsm_tree->collator_name)); WT_ERR(__wt_config_gets(session, cfg, "lsm.auto_throttle", &cval)); if (cval.val) F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); else F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom", &cval)); FLD_SET(lsm_tree->bloom, (cval.val == 0 ? WT_LSM_BLOOM_OFF : WT_LSM_BLOOM_MERGED)); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_oldest", &cval)); if (cval.val != 0) FLD_SET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST); if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST)) WT_ERR_MSG(session, EINVAL, "Bloom filters can only be created on newest and oldest " "chunks if bloom filters are enabled"); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_config", &cval)); if (cval.type == WT_CONFIG_ITEM_STRUCT) { cval.str++; cval.len -= 2; } WT_ERR(__wt_strndup( session, cval.str, cval.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_bit_count", &cval)); lsm_tree->bloom_bit_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.bloom_hash_count", &cval)); lsm_tree->bloom_hash_count = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_max", &cval)); lsm_tree->chunk_max = (uint64_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.chunk_size", &cval)); lsm_tree->chunk_size = (uint64_t)cval.val; if (lsm_tree->chunk_size > lsm_tree->chunk_max) WT_ERR_MSG(session, EINVAL, "Chunk size (chunk_size) must be smaller than or equal to " "the maximum chunk size (chunk_max)"); WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_max", &cval)); lsm_tree->merge_max = (uint32_t)cval.val; WT_ERR(__wt_config_gets(session, cfg, "lsm.merge_min", &cval)); lsm_tree->merge_min = (uint32_t)cval.val; if (lsm_tree->merge_min > lsm_tree->merge_max) WT_ERR_MSG(session, EINVAL, "LSM merge_min must be less than or equal to merge_max"); /* * Set up the config for each chunk. * * Make the memory_page_max double the chunk size, so application * threads don't immediately try to force evict the chunk when the * worker thread clears the NO_EVICTION flag. */ WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "%s,key_format=u,value_format=u,memory_page_max=%" PRIu64, config, 2 * lsm_tree->chunk_max)); WT_ERR(__wt_strndup( session, buf->data, buf->size, &lsm_tree->file_config)); /* Create the first chunk and flush the metadata. */ WT_ERR(__wt_lsm_meta_write(session, lsm_tree)); /* Discard our partially populated handle. */ ret = __lsm_tree_discard(session, lsm_tree); lsm_tree = NULL; /* * Open our new tree and add it to the handle cache. Don't discard on * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ if (ret == 0) ret = __lsm_tree_open(session, uri, &lsm_tree); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); if (0) { err: WT_TRET(__lsm_tree_discard(session, lsm_tree)); } __wt_scr_free(&buf); return (ret); }
/* * __wt_lsm_meta_read -- * Read the metadata for an LSM tree. */ int __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_CONFIG cparser, lparser; WT_CONFIG_ITEM ck, cv, fileconf, lk, lv, metadata; WT_DECL_RET; WT_LSM_CHUNK *chunk; char *lsmconfig; u_int nchunks; chunk = NULL; /* -Wconditional-uninitialized */ /* LSM trees inherit the merge setting from the connection. */ if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) F_SET(lsm_tree, WT_LSM_TREE_MERGES); WT_RET(__wt_metadata_search(session, lsm_tree->name, &lsmconfig)); WT_ERR(__wt_config_init(session, &cparser, lsmconfig)); while ((ret = __wt_config_next(&cparser, &ck, &cv)) == 0) { if (WT_STRING_MATCH("key_format", ck.str, ck.len)) { __wt_free(session, lsm_tree->key_format); WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->key_format)); } else if (WT_STRING_MATCH("value_format", ck.str, ck.len)) { __wt_free(session, lsm_tree->value_format); WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->value_format)); } else if (WT_STRING_MATCH("collator", ck.str, ck.len)) { if (cv.len == 0 || WT_STRING_MATCH("none", cv.str, cv.len)) continue; /* * Extract the application-supplied metadata (if any) * from the file configuration. */ WT_ERR(__wt_config_getones( session, lsmconfig, "file_config", &fileconf)); WT_CLEAR(metadata); WT_ERR_NOTFOUND_OK(__wt_config_subgets( session, &fileconf, "app_metadata", &metadata)); WT_ERR(__wt_collator_config(session, lsm_tree->name, &cv, &metadata, &lsm_tree->collator, &lsm_tree->collator_owned)); WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->collator_name)); } else if (WT_STRING_MATCH("bloom_config", ck.str, ck.len)) { __wt_free(session, lsm_tree->bloom_config); /* Don't include the brackets. */ WT_ERR(__wt_strndup(session, cv.str + 1, cv.len - 2, &lsm_tree->bloom_config)); } else if (WT_STRING_MATCH("file_config", ck.str, ck.len)) { __wt_free(session, lsm_tree->file_config); /* Don't include the brackets. */ WT_ERR(__wt_strndup(session, cv.str + 1, cv.len - 2, &lsm_tree->file_config)); } else if (WT_STRING_MATCH("auto_throttle", ck.str, ck.len)) { if (cv.val) F_SET(lsm_tree, WT_LSM_TREE_THROTTLE); else F_CLR(lsm_tree, WT_LSM_TREE_THROTTLE); } else if (WT_STRING_MATCH("bloom", ck.str, ck.len)) lsm_tree->bloom = (uint32_t)cv.val; else if (WT_STRING_MATCH("bloom_bit_count", ck.str, ck.len)) lsm_tree->bloom_bit_count = (uint32_t)cv.val; else if (WT_STRING_MATCH("bloom_hash_count", ck.str, ck.len)) lsm_tree->bloom_hash_count = (uint32_t)cv.val; else if (WT_STRING_MATCH("chunk_count_limit", ck.str, ck.len)) { lsm_tree->chunk_count_limit = (uint32_t)cv.val; if (cv.val != 0) F_CLR(lsm_tree, WT_LSM_TREE_MERGES); } else if (WT_STRING_MATCH("chunk_max", ck.str, ck.len)) lsm_tree->chunk_max = (uint64_t)cv.val; else if (WT_STRING_MATCH("chunk_size", ck.str, ck.len)) lsm_tree->chunk_size = (uint64_t)cv.val; else if (WT_STRING_MATCH("merge_max", ck.str, ck.len)) lsm_tree->merge_max = (uint32_t)cv.val; else if (WT_STRING_MATCH("merge_min", ck.str, ck.len)) lsm_tree->merge_min = (uint32_t)cv.val; else if (WT_STRING_MATCH("last", ck.str, ck.len)) lsm_tree->last = (u_int)cv.val; else if (WT_STRING_MATCH("chunks", ck.str, ck.len)) { WT_ERR(__wt_config_subinit(session, &lparser, &cv)); for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0; ) { if (WT_STRING_MATCH("id", lk.str, lk.len)) { WT_ERR(__wt_realloc_def(session, &lsm_tree->chunk_alloc, nchunks + 1, &lsm_tree->chunk)); WT_ERR( __wt_calloc_one(session, &chunk)); lsm_tree->chunk[nchunks++] = chunk; chunk->id = (uint32_t)lv.val; WT_ERR(__wt_lsm_tree_chunk_name(session, lsm_tree, chunk->id, &chunk->uri)); F_SET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE); } else if (WT_STRING_MATCH( "bloom", lk.str, lk.len)) { WT_ERR(__wt_lsm_tree_bloom_name( session, lsm_tree, chunk->id, &chunk->bloom_uri)); F_SET(chunk, WT_LSM_CHUNK_BLOOM); continue; } else if (WT_STRING_MATCH( "chunk_size", lk.str, lk.len)) { chunk->size = (uint64_t)lv.val; continue; } else if (WT_STRING_MATCH( "count", lk.str, lk.len)) { chunk->count = (uint64_t)lv.val; continue; } else if (WT_STRING_MATCH( "generation", lk.str, lk.len)) { chunk->generation = (uint32_t)lv.val; continue; } } WT_ERR_NOTFOUND_OK(ret); lsm_tree->nchunks = nchunks; } else if (WT_STRING_MATCH("old_chunks", ck.str, ck.len)) { WT_ERR(__wt_config_subinit(session, &lparser, &cv)); for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0; ) { if (WT_STRING_MATCH("bloom", lk.str, lk.len)) { WT_ERR(__wt_strndup(session, lv.str, lv.len, &chunk->bloom_uri)); F_SET(chunk, WT_LSM_CHUNK_BLOOM); continue; } WT_ERR(__wt_realloc_def(session, &lsm_tree->old_alloc, nchunks + 1, &lsm_tree->old_chunks)); WT_ERR(__wt_calloc_one(session, &chunk)); lsm_tree->old_chunks[nchunks++] = chunk; WT_ERR(__wt_strndup(session, lk.str, lk.len, &chunk->uri)); F_SET(chunk, WT_LSM_CHUNK_ONDISK); } WT_ERR_NOTFOUND_OK(ret); lsm_tree->nold_chunks = nchunks; } /* * Ignore any other values: the metadata entry might have been * created by a future release, with unknown options. */ } WT_ERR_NOTFOUND_OK(ret); /* * If the default merge_min was not overridden, calculate it now. We * do this here so that trees created before merge_min was added get a * sane value. */ if (lsm_tree->merge_min < 2) lsm_tree->merge_min = WT_MAX(2, lsm_tree->merge_max / 2); err: __wt_free(session, lsmconfig); return (ret); }
/* * __config_process_value -- * Deal with special config values like true / false. */ static int __config_process_value(WT_CONFIG *conf, WT_CONFIG_ITEM *value) { char *endptr; /* Empty values are okay: we can't do anything interesting with them. */ if (value->len == 0) return (0); if (value->type == WT_CONFIG_ITEM_ID) { if (WT_STRING_MATCH("false", value->str, value->len)) { value->type = WT_CONFIG_ITEM_BOOL; value->val = 0; } else if (WT_STRING_MATCH("true", value->str, value->len)) { value->type = WT_CONFIG_ITEM_BOOL; value->val = 1; } } else if (value->type == WT_CONFIG_ITEM_NUM) { errno = 0; value->val = strtoll(value->str, &endptr, 10); /* Check any leftover characters. */ while (endptr < value->str + value->len) switch (*endptr++) { case 'b': case 'B': /* Byte: no change. */ break; case 'k': case 'K': WT_SHIFT_INT64(value->val, 10); break; case 'm': case 'M': WT_SHIFT_INT64(value->val, 20); break; case 'g': case 'G': WT_SHIFT_INT64(value->val, 30); break; case 't': case 'T': WT_SHIFT_INT64(value->val, 40); break; case 'p': case 'P': WT_SHIFT_INT64(value->val, 50); break; default: /* * We didn't get a well-formed number. That * might be okay, the required type will be * checked by __wt_config_check. */ value->type = WT_CONFIG_ITEM_ID; break; } /* * If we parsed the whole string but the number is out of range, * report an error. Don't report an error for strings that * aren't well-formed integers: if an integer is expected, that * will be caught by __wt_config_check. */ if (value->type == WT_CONFIG_ITEM_NUM && errno == ERANGE) goto range; } return (0); range: return (__config_err(conf, "Number out of range", ERANGE)); }