/* * __wt_txn_config -- * Configure a transaction. */ int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_TXN *txn; txn = &session->txn; WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len != 0) txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? WT_ISO_SNAPSHOT : WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED; /* * The default sync setting is inherited from the connection, but can * be overridden by an explicit "sync" setting for this transaction. * * We want to distinguish between inheriting implicitly and explicitly. */ F_CLR(txn, WT_TXN_SYNC_SET); WT_RET(__wt_config_gets_def( session, cfg, "sync", (int)UINT_MAX, &cval)); if (cval.val == 0 || cval.val == 1) /* * This is an explicit setting of sync. Set the flag so * that we know not to overwrite it in commit_transaction. */ F_SET(txn, WT_TXN_SYNC_SET); /* * If sync is turned off explicitly, clear the transaction's sync field. */ if (cval.val == 0) txn->txn_logsync = 0; WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval)); if (cval.len > 0) /* * The layering here isn't ideal - the named snapshot get * function does both validation and setup. Otherwise we'd * need to walk the list of named snapshots twice during * transaction open. */ WT_RET(__wt_txn_named_snapshot_get(session, &cval)); return (0); }
/* * __wt_txn_begin -- * Begin a transaction. */ int __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; conn = S2C(session); txn = &session->txn; txn_global = &conn->txn_global; txn_state = &txn_global->states[session->id]; WT_ASSERT(session, txn_state->id == WT_TXN_NONE); WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len == 0) txn->isolation = session->isolation; else txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? TXN_ISO_SNAPSHOT : WT_STRING_MATCH("read-committed", cval.str, cval.len) ? TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED; /* * Allocate a transaction ID. * * We use an atomic compare and swap to ensure that we get a * unique ID that is published before the global counter is * updated. * * If two threads race to allocate an ID, only the latest ID * will proceed. The winning thread can be sure its snapshot * contains all of the earlier active IDs. Threads that race * and get an earlier ID may not appear in the snapshot, but * they will loop and allocate a new ID before proceeding to * make any updates. * * This potentially wastes transaction IDs when threads race to * begin transactions: that is the price we pay to keep this * path latch free. */ do { txn_state->id = txn->id = txn_global->current; } while (!WT_ATOMIC_CAS(txn_global->current, txn->id, txn->id + 1)); /* * If we have used 64-bits of transaction IDs, there is nothing * more we can do. */ if (txn->id == WT_TXN_ABORTED) WT_RET_MSG(session, ENOMEM, "Out of transaction IDs"); F_SET(txn, TXN_RUNNING); if (txn->isolation == TXN_ISO_SNAPSHOT) __wt_txn_refresh(session, WT_TXN_NONE, 1); return (0); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; bool bitmap, bulk; bitmap = bulk = false; flags = 0; WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.type == WT_CONFIG_ITEM_BOOL || (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) { bitmap = false; bulk = cval.val != 0; } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = true; else WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE); /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* * If we are opening a bulk cursor, get the handle while * holding the checkpoint lock. This prevents a bulk cursor * open failing with EBUSY due to a database-wide checkpoint. */ if (bulk) __wt_spin_lock( session, &S2C(session)->checkpoint_lock); ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags); if (bulk) __wt_spin_unlock( session, &S2C(session)->checkpoint_lock); WT_RET(ret); } else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); /* Increment the data-source's in-use counter. */ __wt_cursor_dhandle_incr_use(session); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __logmgr_config -- * Parse and setup the logging server options. */ static int __logmgr_config(WT_SESSION_IMPL *session, const char **cfg, int *runp) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; conn = S2C(session); /* * The logging configuration is off by default. */ WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); *runp = cval.val != 0; /* * Setup a log path, compression and encryption even if logging is * disabled in case we are going to print a log. */ conn->log_compressor = NULL; WT_RET(__wt_config_gets_none(session, cfg, "log.compressor", &cval)); WT_RET(__wt_compressor_config(session, &cval, &conn->log_compressor)); WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->log_path)); /* We are done if logging isn't enabled. */ if (*runp == 0) return (0); WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); /* * If pre-allocation is configured, set the initial number to one. * We'll adapt as load dictates. */ if (cval.val != 0) { FLD_SET(conn->log_flags, WT_CONN_LOG_PREALLOC); conn->log_prealloc = 1; } WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); WT_RET(__logmgr_sync_cfg(session, cfg)); return (0); }
/* * __wt_schema_drop -- * Process a WT_SESSION::drop operation for all supported types. */ int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_DATA_SOURCE *dsrc; WT_DECL_RET; bool force; WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); force = cval.val != 0; WT_RET(__wt_meta_track_on(session)); /* Paranoia: clear any handle from our caller. */ session->dhandle = NULL; if (WT_PREFIX_MATCH(uri, "colgroup:")) ret = __drop_colgroup(session, uri, force, cfg); else if (WT_PREFIX_MATCH(uri, "file:")) ret = __drop_file(session, uri, force, cfg); else if (WT_PREFIX_MATCH(uri, "index:")) ret = __drop_index(session, uri, force, cfg); else if (WT_PREFIX_MATCH(uri, "lsm:")) ret = __wt_lsm_tree_drop(session, uri, cfg); else if (WT_PREFIX_MATCH(uri, "table:")) ret = __drop_table(session, uri, cfg); else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) ret = dsrc->drop == NULL ? __wt_object_unsupported(session, uri) : dsrc->drop( dsrc, &session->iface, uri, (WT_CONFIG_ARG *)cfg); else ret = __wt_bad_object_type(session, uri); /* * Map WT_NOTFOUND to ENOENT, based on the assumption WT_NOTFOUND means * there was no metadata entry. Map ENOENT to zero if force is set. */ if (ret == WT_NOTFOUND || ret == ENOENT) ret = force ? 0 : ENOENT; /* Bump the schema generation so that stale data is ignored. */ ++S2C(session)->schema_gen; WT_TRET(__wt_meta_track_off(session, true, ret != 0)); return (ret); }
/* * __wt_curfile_create -- * Open a cursor for a given btree handle. */ int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], int bulk, int bitmap, WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ __wt_cursor_get_value, /* get-value */ __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __curfile_compare, /* compare */ __curfile_equals, /* equals */ __curfile_next, /* next */ __curfile_prev, /* prev */ __curfile_reset, /* reset */ __curfile_search, /* search */ __curfile_search_near, /* search-near */ __curfile_insert, /* insert */ __curfile_update, /* update */ __curfile_remove, /* remove */ __wt_cursor_reconfigure, /* reconfigure */ __curfile_close); /* close */ WT_BTREE *btree; WT_CONFIG_ITEM cval; WT_CURSOR *cursor; WT_CURSOR_BTREE *cbt; WT_CURSOR_BULK *cbulk; WT_DECL_RET; size_t csize; WT_STATIC_ASSERT(offsetof(WT_CURSOR_BTREE, iface) == 0); cbt = NULL; btree = S2BT(session); WT_ASSERT(session, btree != NULL); csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE); WT_RET(__wt_calloc(session, 1, csize, &cbt)); cursor = &cbt->iface; *cursor = iface; cursor->session = &session->iface; cursor->internal_uri = btree->dhandle->name; cursor->key_format = btree->key_format; cursor->value_format = btree->value_format; cbt->btree = btree; if (bulk) { F_SET(cursor, WT_CURSTD_BULK); cbulk = (WT_CURSOR_BULK *)cbt; /* Optionally skip the validation of each bulk-loaded key. */ WT_ERR(__wt_config_gets_def( session, cfg, "skip_sort_check", 0, &cval)); WT_ERR(__wt_curbulk_init( session, cbulk, bitmap, cval.val == 0 ? 0 : 1)); } /* * random_retrieval * Random retrieval cursors only support next, reset and close. */ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval)); if (cval.val != 0) { __wt_cursor_set_notsup(cursor); cursor->next = __curfile_next_random; cursor->reset = __curfile_reset; } /* Underlying btree initialization. */ __wt_btcur_open(cbt); /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init( cursor, cursor->internal_uri, owner, cfg, cursorp)); WT_STAT_FAST_CONN_INCR(session, cursor_create); WT_STAT_FAST_DATA_INCR(session, cursor_create); if (0) { err: __wt_free(session, cbt); } return (ret); }
/* * __logmgr_config -- * Parse and setup the logging server options. */ static int __logmgr_config( WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; bool enabled; conn = S2C(session); WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); enabled = cval.val != 0; /* * If we're reconfiguring, enabled must match the already * existing setting. * * If it is off and the user it turning it on, or it is on * and the user is turning it off, return an error. */ if (reconfig && ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)))) return (EINVAL); /* Logging is incompatible with in-memory */ if (enabled) { WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) WT_RET_MSG(session, EINVAL, "In memory configuration incompatible with " "log=(enabled=true)"); } *runp = enabled; /* * Setup a log path and compression even if logging is disabled in case * we are going to print a log. Only do this on creation. Once a * compressor or log path are set they cannot be changed. */ if (!reconfig) { conn->log_compressor = NULL; WT_RET(__wt_config_gets_none( session, cfg, "log.compressor", &cval)); WT_RET(__wt_compressor_config( session, &cval, &conn->log_compressor)); WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); WT_RET(__wt_strndup( session, cval.str, cval.len, &conn->log_path)); } /* We are done if logging isn't enabled. */ if (!*runp) return (0); WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); if (!reconfig) { /* * Ignore if the user tries to change the file size. The * amount of memory allocated to the log slots may be based * on the log file size at creation and we don't want to * re-allocate that memory while running. */ WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); } /* * If pre-allocation is configured, set the initial number to a few. * We'll adapt as load dictates. */ WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); if (cval.val != 0) conn->log_prealloc = 1; /* * Note that it is meaningless to reconfigure this value during * runtime. It only matters on create before recovery runs. */ WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval)); if (cval.val != 0) { if (F_ISSET(conn, WT_CONN_READONLY)) WT_RET_MSG(session, EINVAL, "Read-only configuration incompatible with " "zero-filling log files"); FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL); } WT_RET(__logmgr_sync_cfg(session, cfg)); if (conn->log_cond != NULL) WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); return (0); }
/* * __wt_txn_commit -- * Commit the current transaction. */ int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_TXN *txn; WT_TXN_OP *op; u_int i; txn = &session->txn; conn = S2C(session); WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); if (!F_ISSET(txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "No transaction is active"); /* * The default sync setting is inherited from the connection, but can * be overridden by an explicit "sync" setting for this transaction. */ WT_RET(__wt_config_gets_def(session, cfg, "sync", 0, &cval)); /* * If the user chose the default setting, check whether sync is enabled * for this transaction (either inherited or via begin_transaction). * If sync is disabled, clear the field to avoid the log write being * flushed. * * Otherwise check for specific settings. We don't need to check for * "on" because that is the default inherited from the connection. If * the user set anything in begin_transaction, we only override with an * explicit setting. */ if (cval.len == 0) { if (!FLD_ISSET(txn->txn_logsync, WT_LOG_FLUSH) && !F_ISSET(txn, WT_TXN_SYNC_SET)) txn->txn_logsync = 0; } else { /* * If the caller already set sync on begin_transaction then * they should not be using sync on commit_transaction. * Flag that as an error. */ if (F_ISSET(txn, WT_TXN_SYNC_SET)) WT_RET_MSG(session, EINVAL, "Sync already set during begin_transaction."); if (WT_STRING_MATCH("background", cval.str, cval.len)) txn->txn_logsync = WT_LOG_BACKGROUND; else if (WT_STRING_MATCH("off", cval.str, cval.len)) txn->txn_logsync = 0; /* * We don't need to check for "on" here because that is the * default to inherit from the connection setting. */ } /* Commit notification. */ if (txn->notify != NULL) WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 1)); /* If we are logging, write a commit log record. */ if (ret == 0 && txn->mod_count > 0 && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && !F_ISSET(session, WT_SESSION_NO_LOGGING)) { /* * We are about to block on I/O writing the log. * Release our snapshot in case it is keeping data pinned. * This is particularly important for checkpoints. */ __wt_txn_release_snapshot(session); ret = __wt_txn_log_commit(session, cfg); WT_ASSERT(session, ret == 0); } /* * If anything went wrong, roll back. * * !!! * Nothing can fail after this point. */ if (ret != 0) { WT_TRET(__wt_txn_rollback(session, cfg)); return (ret); } /* Free memory associated with updates. */ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) __wt_txn_op_free(session, op); txn->mod_count = 0; /* * We are about to release the snapshot: copy values into any * positioned cursors so they don't point to updates that could be * freed once we don't have a transaction ID pinned. */ if (session->ncursors > 0) WT_RET(__wt_session_copy_values(session)); __wt_txn_release(session); return (0); }
/* * __wt_curfile_open -- * WT_SESSION->open_cursor method for the btree cursor type. */ int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; bool bitmap, bulk, checkpoint_wait; bitmap = bulk = false; checkpoint_wait = true; flags = 0; /* * Decode the bulk configuration settings. In memory databases * ignore bulk load. */ if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { WT_RET(__wt_config_gets_def(session, cfg, "bulk", 0, &cval)); if (cval.type == WT_CONFIG_ITEM_BOOL || (cval.type == WT_CONFIG_ITEM_NUM && (cval.val == 0 || cval.val == 1))) { bitmap = false; bulk = cval.val != 0; } else if (WT_STRING_MATCH("bitmap", cval.str, cval.len)) bitmap = bulk = true; /* * Unordered bulk insert is a special case used * internally by index creation on existing tables. It * doesn't enforce any special semantics at the file * level. It primarily exists to avoid some locking * problems between LSM and index creation. */ else if (!WT_STRING_MATCH("unordered", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); if (bulk) { WT_RET(__wt_config_gets(session, cfg, "checkpoint_wait", &cval)); checkpoint_wait = cval.val != 0; } } /* Bulk handles require exclusive access. */ if (bulk) LF_SET(WT_BTREE_BULK | WT_DHANDLE_EXCLUSIVE); /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* * If we are opening exclusive and don't want a bulk cursor * open to fail with EBUSY due to a database-wide checkpoint, * get the handle while holding the checkpoint lock. */ if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait) WT_WITH_CHECKPOINT_LOCK(session, ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags)); else ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags); WT_RET(ret); } else WT_RET(__wt_bad_object_type(session, uri)); WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); return (0); err: /* If the cursor could not be opened, release the handle. */ WT_TRET(__wt_session_release_btree(session)); return (ret); }
/* * __curfile_create -- * Open a cursor for a given btree handle. */ static int __curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ __wt_cursor_get_value, /* get-value */ __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __curfile_compare, /* compare */ __curfile_equals, /* equals */ __curfile_next, /* next */ __curfile_prev, /* prev */ __curfile_reset, /* reset */ __curfile_search, /* search */ __curfile_search_near, /* search-near */ __curfile_insert, /* insert */ __wt_cursor_modify_notsup, /* modify */ __curfile_update, /* update */ __curfile_remove, /* remove */ __curfile_reserve, /* reserve */ __wt_cursor_reconfigure, /* reconfigure */ __curfile_close); /* close */ WT_BTREE *btree; WT_CONFIG_ITEM cval; WT_CURSOR *cursor; WT_CURSOR_BTREE *cbt; WT_CURSOR_BULK *cbulk; WT_DECL_RET; size_t csize; WT_STATIC_ASSERT(offsetof(WT_CURSOR_BTREE, iface) == 0); cbt = NULL; btree = S2BT(session); WT_ASSERT(session, btree != NULL); csize = bulk ? sizeof(WT_CURSOR_BULK) : sizeof(WT_CURSOR_BTREE); WT_RET(__wt_calloc(session, 1, csize, &cbt)); cursor = &cbt->iface; *cursor = iface; cursor->session = &session->iface; cursor->internal_uri = btree->dhandle->name; cursor->key_format = btree->key_format; cursor->value_format = btree->value_format; cbt->btree = btree; /* * Increment the data-source's in-use counter; done now because closing * the cursor will decrement it, and all failure paths from here close * the cursor. */ __wt_cursor_dhandle_incr_use(session); if (session->dhandle->checkpoint != NULL) F_SET(cbt, WT_CBT_NO_TXN); if (bulk) { F_SET(cursor, WT_CURSTD_BULK); cbulk = (WT_CURSOR_BULK *)cbt; /* Optionally skip the validation of each bulk-loaded key. */ WT_ERR(__wt_config_gets_def( session, cfg, "skip_sort_check", 0, &cval)); WT_ERR(__wt_curbulk_init( session, cbulk, bitmap, cval.val == 0 ? 0 : 1)); } /* * Random retrieval, row-store only. * Random retrieval cursors support a limited set of methods. */ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval)); if (cval.val != 0) { if (WT_CURSOR_RECNO(cursor)) WT_ERR_MSG(session, ENOTSUP, "next_random configuration not supported for " "column-store objects"); __wt_cursor_set_notsup(cursor); cursor->next = __wt_curfile_next_random; cursor->reset = __curfile_reset; WT_ERR(__wt_config_gets_def( session, cfg, "next_random_sample_size", 0, &cval)); if (cval.val != 0) cbt->next_random_sample_size = (u_int)cval.val; } /* Underlying btree initialization. */ __wt_btcur_open(cbt); /* * WT_CURSOR.modify supported on 'u' value formats, but the fast-path * through the btree code requires log file format changes, it's not * available in all versions. */ if (WT_STREQ(cursor->value_format, "u") && S2C(session)->compat_major >= WT_LOG_V2) cursor->modify = __curfile_modify; WT_ERR(__wt_cursor_init( cursor, cursor->internal_uri, owner, cfg, cursorp)); WT_STAT_CONN_INCR(session, cursor_create); WT_STAT_DATA_INCR(session, cursor_create); if (0) { err: /* * Our caller expects to release the data handle if we fail. * Disconnect it from the cursor before closing. */ if (session->dhandle != NULL) __wt_cursor_dhandle_decr_use(session); cbt->btree = NULL; WT_TRET(__curfile_close(cursor)); *cursorp = NULL; } return (ret); }
/* * __logmgr_config -- * Parse and setup the logging server options. */ static int __logmgr_config( WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; bool enabled; /* * A note on reconfiguration: the standard "is this configuration string * allowed" checks should fail if reconfiguration has invalid strings, * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because * the connection reconfiguration method doesn't allow those strings. * Additionally, the base configuration values during reconfiguration * are the currently configured values (so we don't revert to default * values when repeatedly reconfiguring), and configuration processing * of a currently set value should not change the currently set value. * * In this code path, log server reconfiguration does not stop/restart * the log server, so there's no point in re-evaluating configuration * strings that cannot be reconfigured, risking bugs in configuration * setup, and depending on evaluation of currently set values to always * result in the currently set value. Skip tests for any configuration * strings which don't make sense during reconfiguration, but don't * worry about error reporting because it should never happen. */ conn = S2C(session); WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); enabled = cval.val != 0; /* * If we're reconfiguring, enabled must match the already * existing setting. * * If it is off and the user it turning it on, or it is on * and the user is turning it off, return an error. * * See above: should never happen. */ if (reconfig && ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)))) return (EINVAL); /* Logging is incompatible with in-memory */ if (enabled) { WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) WT_RET_MSG(session, EINVAL, "In memory configuration incompatible with " "log=(enabled=true)"); } *runp = enabled; /* * Setup a log path and compression even if logging is disabled in case * we are going to print a log. Only do this on creation. Once a * compressor or log path are set they cannot be changed. * * See above: should never happen. */ if (!reconfig) { conn->log_compressor = NULL; WT_RET(__wt_config_gets_none( session, cfg, "log.compressor", &cval)); WT_RET(__wt_compressor_config( session, &cval, &conn->log_compressor)); WT_RET(__wt_config_gets(session, cfg, "log.path", &cval)); WT_RET(__wt_strndup( session, cval.str, cval.len, &conn->log_path)); } /* We are done if logging isn't enabled. */ if (!*runp) return (0); WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval)); if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); /* * The file size cannot be reconfigured. The amount of memory allocated * to the log slots may be based on the log file size at creation and we * don't want to re-allocate that memory while running. * * See above: should never happen. */ if (!reconfig) { WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, log_max_filesize, conn->log_file_max); } /* * If pre-allocation is configured, set the initial number to a few. * We'll adapt as load dictates. */ WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval)); if (cval.val != 0) conn->log_prealloc = 1; /* * Note it's meaningless to reconfigure this value during runtime, it * only matters on create before recovery runs. * * See above: should never happen. */ if (!reconfig) { WT_RET(__wt_config_gets_def( session, cfg, "log.recover", 0, &cval)); if (WT_STRING_MATCH("error", cval.str, cval.len)) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); } WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval)); if (cval.val != 0) { if (F_ISSET(conn, WT_CONN_READONLY)) WT_RET_MSG(session, EINVAL, "Read-only configuration incompatible with " "zero-filling log files"); FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL); } WT_RET(__logmgr_sync_cfg(session, cfg)); if (conn->log_cond != NULL) WT_RET(__wt_cond_auto_signal(session, conn->log_cond)); return (0); }
/* * __wt_txn_commit -- * Commit the current transaction. */ int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_OP *op; u_int i; bool locked, readonly; #ifdef HAVE_TIMESTAMPS wt_timestamp_t prev_commit_timestamp, ts; bool update_timestamp; #endif txn = &session->txn; conn = S2C(session); txn_global = &conn->txn_global; locked = false; WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); readonly = txn->mod_count == 0; /* * Look for a commit timestamp. */ WT_ERR( __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval)); if (cval.len != 0) { #ifdef HAVE_TIMESTAMPS WT_ERR(__wt_txn_parse_timestamp(session, "commit", &ts, &cval)); WT_ERR(__wt_timestamp_validate(session, "commit", &ts, &cval, true, true, true)); __wt_timestamp_set(&txn->commit_timestamp, &ts); __wt_txn_set_commit_timestamp(session); #else WT_ERR_MSG(session, EINVAL, "commit_timestamp requires a " "version of WiredTiger built with timestamp support"); #endif } #ifdef HAVE_TIMESTAMPS /* * Debugging checks on timestamps, if user requested them. */ if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) && !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && txn->mod_count != 0) WT_ERR_MSG(session, EINVAL, "commit_timestamp required and " "none set on this transaction"); if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) && F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && txn->mod_count != 0) WT_ERR_MSG(session, EINVAL, "no commit_timestamp required and " "timestamp set on this transaction"); #endif /* * The default sync setting is inherited from the connection, but can * be overridden by an explicit "sync" setting for this transaction. */ WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval)); /* * If the user chose the default setting, check whether sync is enabled * for this transaction (either inherited or via begin_transaction). * If sync is disabled, clear the field to avoid the log write being * flushed. * * Otherwise check for specific settings. We don't need to check for * "on" because that is the default inherited from the connection. If * the user set anything in begin_transaction, we only override with an * explicit setting. */ if (cval.len == 0) { if (!FLD_ISSET(txn->txn_logsync, WT_LOG_SYNC_ENABLED) && !F_ISSET(txn, WT_TXN_SYNC_SET)) txn->txn_logsync = 0; } else { /* * If the caller already set sync on begin_transaction then * they should not be using sync on commit_transaction. * Flag that as an error. */ if (F_ISSET(txn, WT_TXN_SYNC_SET)) WT_ERR_MSG(session, EINVAL, "Sync already set during begin_transaction"); if (WT_STRING_MATCH("background", cval.str, cval.len)) txn->txn_logsync = WT_LOG_BACKGROUND; else if (WT_STRING_MATCH("off", cval.str, cval.len)) txn->txn_logsync = 0; /* * We don't need to check for "on" here because that is the * default to inherit from the connection setting. */ } /* Commit notification. */ if (txn->notify != NULL) WT_ERR(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 1)); /* * We are about to release the snapshot: copy values into any * positioned cursors so they don't point to updates that could be * freed once we don't have a snapshot. */ if (session->ncursors > 0) { WT_DIAGNOSTIC_YIELD; WT_ERR(__wt_session_copy_values(session)); } /* If we are logging, write a commit log record. */ if (txn->logrec != NULL && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && !F_ISSET(session, WT_SESSION_NO_LOGGING)) { /* * We are about to block on I/O writing the log. * Release our snapshot in case it is keeping data pinned. * This is particularly important for checkpoints. */ __wt_txn_release_snapshot(session); /* * We hold the visibility lock for reading from the time * we write our log record until the time we release our * transaction so that the LSN any checkpoint gets will * always reflect visible data. */ __wt_readlock(session, &txn_global->visibility_rwlock); locked = true; WT_ERR(__wt_txn_log_commit(session, cfg)); } /* Note: we're going to commit: nothing can fail after this point. */ /* Process and free updates. */ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) { switch (op->type) { case WT_TXN_OP_BASIC: case WT_TXN_OP_BASIC_TS: case WT_TXN_OP_INMEM: /* * Switch reserved operations to abort to * simplify obsolete update list truncation. */ if (op->u.upd->type == WT_UPDATE_RESERVED) { op->u.upd->txnid = WT_TXN_ABORTED; break; } /* * Writes to the lookaside file can be evicted as soon * as they commit. */ if (conn->cache->las_fileid != 0 && op->fileid == conn->cache->las_fileid) { op->u.upd->txnid = WT_TXN_NONE; break; } #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) && op->type != WT_TXN_OP_BASIC_TS) { WT_ASSERT(session, op->fileid != WT_METAFILE_ID); __wt_timestamp_set(&op->u.upd->timestamp, &txn->commit_timestamp); } #endif break; case WT_TXN_OP_REF: #ifdef HAVE_TIMESTAMPS if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) __wt_timestamp_set( &op->u.ref->page_del->timestamp, &txn->commit_timestamp); #endif break; case WT_TXN_OP_TRUNCATE_COL: case WT_TXN_OP_TRUNCATE_ROW: /* Other operations don't need timestamps. */ break; } __wt_txn_op_free(session, op); } txn->mod_count = 0; #ifdef HAVE_TIMESTAMPS /* * Track the largest commit timestamp we have seen. * * We don't actually clear the local commit timestamp, just the flag. * That said, we can't update the global commit timestamp until this * transaction is visible, which happens when we release it. */ update_timestamp = F_ISSET(txn, WT_TXN_HAS_TS_COMMIT); #endif __wt_txn_release(session); if (locked) __wt_readunlock(session, &txn_global->visibility_rwlock); #ifdef HAVE_TIMESTAMPS /* First check if we've already committed something in the future. */ if (update_timestamp) { WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock, __wt_timestamp_set( &prev_commit_timestamp, &txn_global->commit_timestamp)); update_timestamp = __wt_timestamp_cmp( &txn->commit_timestamp, &prev_commit_timestamp) > 0; } /* * If it looks like we need to move the global commit timestamp, * write lock and re-check. */ if (update_timestamp) { #if WT_TIMESTAMP_SIZE == 8 while (__wt_timestamp_cmp( &txn->commit_timestamp, &prev_commit_timestamp) > 0) { if (__wt_atomic_cas64( &txn_global->commit_timestamp.val, prev_commit_timestamp.val, txn->commit_timestamp.val)) { txn_global->has_commit_timestamp = true; break; } __wt_timestamp_set( &prev_commit_timestamp, &txn_global->commit_timestamp); } #else __wt_writelock(session, &txn_global->rwlock); if (__wt_timestamp_cmp(&txn->commit_timestamp, &txn_global->commit_timestamp) > 0) { __wt_timestamp_set(&txn_global->commit_timestamp, &txn->commit_timestamp); txn_global->has_commit_timestamp = true; } __wt_writeunlock(session, &txn_global->rwlock); #endif } #endif /* * We're between transactions, if we need to block for eviction, it's * a good time to do so. Note that we must ignore any error return * because the user's data is committed. */ if (!readonly) (void)__wt_cache_eviction_check(session, false, false, NULL); return (0); err: /* * If anything went wrong, roll back. * * !!! * Nothing can fail after this point. */ if (locked) __wt_readunlock(session, &txn_global->visibility_rwlock); WT_TRET(__wt_txn_rollback(session, cfg)); return (ret); }
/* * __wt_txn_config -- * Configure a transaction. */ int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval; WT_TXN *txn; txn = &session->txn; WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval)); if (cval.len != 0) txn->isolation = WT_STRING_MATCH("snapshot", cval.str, cval.len) ? WT_ISO_SNAPSHOT : WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED; /* * The default sync setting is inherited from the connection, but can * be overridden by an explicit "sync" setting for this transaction. * * We want to distinguish between inheriting implicitly and explicitly. */ F_CLR(txn, WT_TXN_SYNC_SET); WT_RET(__wt_config_gets_def( session, cfg, "sync", (int)UINT_MAX, &cval)); if (cval.val == 0 || cval.val == 1) /* * This is an explicit setting of sync. Set the flag so * that we know not to overwrite it in commit_transaction. */ F_SET(txn, WT_TXN_SYNC_SET); /* * If sync is turned off explicitly, clear the transaction's sync field. */ if (cval.val == 0) txn->txn_logsync = 0; WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval)); if (cval.len > 0) /* * The layering here isn't ideal - the named snapshot get * function does both validation and setup. Otherwise we'd * need to walk the list of named snapshots twice during * transaction open. */ WT_RET(__wt_txn_named_snapshot_get(session, &cval)); WT_RET(__wt_config_gets_def(session, cfg, "read_timestamp", 0, &cval)); if (cval.len > 0) { #ifdef HAVE_TIMESTAMPS wt_timestamp_t ts; WT_TXN_GLOBAL *txn_global; char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1]; bool round_to_oldest; txn_global = &S2C(session)->txn_global; WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval)); /* * Read the configuration here to reduce the span of the * critical section. */ WT_RET(__wt_config_gets_def(session, cfg, "round_to_oldest", 0, &cval)); round_to_oldest = cval.val; /* * This code is not using the timestamp validate function to * avoid a race between checking and setting transaction * timestamp. */ __wt_readlock(session, &txn_global->rwlock); if (__wt_timestamp_cmp(&ts, &txn_global->oldest_timestamp) < 0) { WT_RET(__wt_timestamp_to_hex_string(session, timestamp_buf, &ts)); /* * If given read timestamp is earlier than oldest * timestamp then round the read timestamp to * oldest timestamp. */ if (round_to_oldest) __wt_timestamp_set(&txn->read_timestamp, &txn_global->oldest_timestamp); else { __wt_readunlock(session, &txn_global->rwlock); WT_RET_MSG(session, EINVAL, "read timestamp " "%s older than oldest timestamp", timestamp_buf); } } else { __wt_timestamp_set(&txn->read_timestamp, &ts); /* * Reset to avoid a verbose message as read * timestamp is not rounded to oldest timestamp. */ round_to_oldest = false; } __wt_txn_set_read_timestamp(session); __wt_readunlock(session, &txn_global->rwlock); txn->isolation = WT_ISO_SNAPSHOT; if (round_to_oldest) { /* * This message is generated here to reduce the span of * critical section. */ __wt_verbose(session, WT_VERB_TIMESTAMP, "Read " "timestamp %s : Rounded to oldest timestamp", timestamp_buf); } #else WT_RET_MSG(session, EINVAL, "read_timestamp requires a " "version of WiredTiger built with timestamp support"); #endif } return (0); }
/* * __wt_curmetadata_open -- * WT_SESSION->open_cursor method for metadata cursors. * * Metadata cursors are a similar to a file cursor on the special metadata * table, except that the metadata for the metadata table (which is stored * in the turtle file) can also be queried. * * Metadata cursors are read-only by default. */ int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ __wt_cursor_get_value, /* get-value */ __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __curmetadata_compare, /* compare */ __wt_cursor_equals, /* equals */ __curmetadata_next, /* next */ __curmetadata_prev, /* prev */ __curmetadata_reset, /* reset */ __curmetadata_search, /* search */ __curmetadata_search_near, /* search-near */ __curmetadata_insert, /* insert */ __curmetadata_update, /* update */ __curmetadata_remove, /* remove */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curmetadata_close); /* close */ WT_CURSOR *cursor; WT_CURSOR_METADATA *mdc; WT_DECL_RET; WT_CONFIG_ITEM cval; WT_RET(__wt_calloc_one(session, &mdc)); cursor = &mdc->iface; *cursor = iface; cursor->session = &session->iface; cursor->key_format = "S"; cursor->value_format = "S"; /* * Open the file cursor for operations on the regular metadata; don't * use the existing, cached session metadata cursor, the configuration * may not be the same. */ WT_ERR(__wt_metadata_cursor_open(session, cfg[1], &mdc->file_cursor)); WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp)); /* If we are only returning create config, strip internal metadata. */ if (WT_STREQ(uri, "metadata:create")) F_SET(mdc, WT_MDC_CREATEONLY); /* * Metadata cursors default to readonly; if not set to not-readonly, * they are permanently readonly and cannot be reconfigured. */ WT_ERR(__wt_config_gets_def(session, cfg, "readonly", 1, &cval)); if (cval.val != 0) { cursor->insert = __wt_cursor_notsup; cursor->update = __wt_cursor_notsup; cursor->remove = __wt_cursor_notsup; } if (0) { err: WT_TRET(__curmetadata_close(cursor)); *cursorp = NULL; } return (ret); }