C++ (Cpp) __wt_readunlock示例

示例#1

0

显示文件

文件： txn_rollback_to_stable.c 项目： mpobrien/mongo

/*
 * __txn_rollback_to_stable_check --
 *	Ensure the rollback request is reasonable.
 */
static int
__txn_rollback_to_stable_check(WT_SESSION_IMPL *session)
{
	WT_TXN_GLOBAL *txn_global;
	bool active_txns, stable_set;

	txn_global = &S2C(session)->txn_global;
	__wt_readlock(session, &txn_global->rwlock);
	stable_set = !__wt_timestamp_iszero(&txn_global->stable_timestamp);
	__wt_readunlock(session, &txn_global->rwlock);
	if (!stable_set)
		WT_RET_MSG(session, EINVAL, "rollback_to_stable requires a "
		    "stable timestamp");

	/*
	 * Help the user - see if they have any active transactions. I'd
	 * like to check the transaction running flag, but that would
	 * require peeking into all open sessions, which isn't really
	 * kosher.
	 */
	WT_RET(__wt_txn_are_any_active(session, &active_txns));
	if (active_txns)
		WT_RET_MSG(session, EINVAL,
		    "rollback_to_stable illegal with active transactions");

	return (0);
}

示例#2

0

显示文件

文件： bt_ovfl.c 项目： qihsh/mongo

/*
 * __wt_ovfl_read --
 *	Bring an overflow item into memory.
 */
int
__wt_ovfl_read(WT_SESSION_IMPL *session,
    WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store)
{
	WT_DECL_RET;

	/*
	 * If no page specified, there's no need to lock and there's no cache
	 * to search, we don't care about WT_CELL_VALUE_OVFL_RM cells.
	 */
	if (page == NULL)
		return (
		    __ovfl_read(session, unpack->data, unpack->size, store));

	/*
	 * WT_CELL_VALUE_OVFL_RM cells: If reconciliation deleted an overflow
	 * value, but there was still a reader in the system that might need it,
	 * the on-page cell type will have been reset to WT_CELL_VALUE_OVFL_RM
	 * and we will be passed a page so we can look-aside into the cache of
	 * such values.
	 *
	 * Acquire the overflow lock, and retest the on-page cell's value inside
	 * the lock.
	 */
	WT_RET(__wt_readlock(session, S2BT(session)->ovfl_lock));
	ret = __wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM ?
	    __wt_ovfl_txnc_search(page, unpack->data, unpack->size, store) :
	    __ovfl_read(session, unpack->data, unpack->size, store);
	WT_TRET(__wt_readunlock(session, S2BT(session)->ovfl_lock));

	return (ret);
}

示例#3

0

显示文件

文件： cur_log.c 项目： brianleepzx/mongo

/*
 * __curlog_close --
 *	WT_CURSOR.close method for the log cursor type.
 */
static int
__curlog_close(WT_CURSOR *cursor)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR_LOG *cl;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;

	CURSOR_API_CALL(cursor, session, close, NULL);
	cl = (WT_CURSOR_LOG *)cursor;
	conn = S2C(session);
	WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED));
	log = conn->log;
	WT_TRET(__wt_readunlock(session, log->log_archive_lock));
	WT_TRET(__curlog_reset(cursor));
	__wt_free(session, cl->cur_lsn);
	__wt_free(session, cl->next_lsn);
	__wt_scr_free(session, &cl->logrec);
	__wt_scr_free(session, &cl->opkey);
	__wt_scr_free(session, &cl->opvalue);
	__wt_free(session, cl->packed_key);
	__wt_free(session, cl->packed_value);
	WT_TRET(__wt_cursor_close(cursor));

err:	API_END_RET(session, ret);
}

示例#4

0

显示文件

/*
 * __curlog_close --
 *	WT_CURSOR.close method for the log cursor type.
 */
static int
__curlog_close(WT_CURSOR *cursor)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR_LOG *cl;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	cl = (WT_CURSOR_LOG *)cursor;
	CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
err:

	conn = S2C(session);
	if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK)) {
		(void)__wt_atomic_sub32(&conn->log_cursors, 1);
		__wt_readunlock(session, &conn->log->log_archive_lock);
	}

	__wt_free(session, cl->cur_lsn);
	__wt_free(session, cl->next_lsn);
	__wt_scr_free(session, &cl->logrec);
	__wt_scr_free(session, &cl->opkey);
	__wt_scr_free(session, &cl->opvalue);
	__wt_free(session, cl->packed_key);
	__wt_free(session, cl->packed_value);

	__wt_cursor_close(cursor);

	API_END_RET(session, ret);
}

示例#5

0

显示文件

文件： lsm_tree.c 项目： RedSunCMX/wiredtiger

/*
 * __wt_lsm_tree_readunlock --
 *	Release a shared lock on an LSM tree.
 */
int
__wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;

	F_CLR(session, WT_SESSION_NO_CACHE_CHECK | WT_SESSION_NO_SCHEMA_LOCK);

	if ((ret = __wt_readunlock(session, lsm_tree->rwlock)) != 0)
		WT_PANIC_RET(session, ret, "Unlocking an LSM tree");
	return (0);
}

示例#6

0

显示文件

文件： bt_ovfl.c 项目： adityavs/wiredtiger

/*
 * __wt_ovfl_read --
 *	Bring an overflow item into memory.
 */
int
__wt_ovfl_read(WT_SESSION_IMPL *session,
    WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded)
{
	WT_DECL_RET;
	WT_OVFL_TRACK *track;
	size_t i;

	*decoded = false;

	/*
	 * If no page specified, there's no need to lock and there's no cache
	 * to search, we don't care about WT_CELL_VALUE_OVFL_RM cells.
	 */
	if (page == NULL)
		return (
		    __ovfl_read(session, unpack->data, unpack->size, store));

	/*
	 * WT_CELL_VALUE_OVFL_RM cells: If reconciliation deleted an overflow
	 * value, but there was still a reader in the system that might need it,
	 * the on-page cell type will have been reset to WT_CELL_VALUE_OVFL_RM
	 * and we will be passed a page so we can check the on-page cell.
	 *
	 * Acquire the overflow lock, and retest the on-page cell's value inside
	 * the lock.
	 */
	__wt_readlock(session, &S2BT(session)->ovfl_lock);
	if (__wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM) {
		track = page->modify->ovfl_track;
		for (i = 0; i < track->remove_next; ++i)
			if (track->remove[i].cell == unpack->cell) {
				store->data = track->remove[i].data;
				store->size = track->remove[i].size;
				break;
			}
		WT_ASSERT(session, i < track->remove_next);
		*decoded = true;
	} else
		ret = __ovfl_read(session, unpack->data, unpack->size, store);
	__wt_readunlock(session, &S2BT(session)->ovfl_lock);

	return (ret);
}

示例#7

0

显示文件

文件： conn_log.c 项目： pgmarchenko/mongo

/*
 * __log_archive_once --
 *	Perform one iteration of log archiving.  Must be called with the
 *	log archive lock held.
 */
static int
__log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	uint32_t lognum, min_lognum;
	u_int i, locked, logcount;
	char **logfiles;

	conn = S2C(session);
	log = conn->log;
	logcount = 0;
	logfiles = NULL;

	/*
	 * If we're coming from a backup cursor we want the smaller of
	 * the last full log file copied in backup or the checkpoint LSN.
	 * Otherwise we want the minimum of the last log file written to
	 * disk and the checkpoint LSN.
	 */
	if (backup_file != 0)
		min_lognum = WT_MIN(log->ckpt_lsn.file, backup_file);
	else
		min_lognum = WT_MIN(log->ckpt_lsn.file, log->sync_lsn.file);
	WT_RET(__wt_verbose(session, WT_VERB_LOG,
	    "log_archive: archive to log number %" PRIu32, min_lognum));

	/*
	 * Main archive code.  Get the list of all log files and
	 * remove any earlier than the minimum log number.
	 */
	WT_RET(__wt_dirlist(session, conn->log_path,
	    WT_LOG_FILENAME, WT_DIRLIST_INCLUDE, &logfiles, &logcount));

	/*
	 * We can only archive files if a hot backup is not in progress or
	 * if we are the backup.
	 */
	WT_RET(__wt_readlock(session, conn->hot_backup_lock));
	locked = 1;
	if (conn->hot_backup == 0 || backup_file != 0) {
		for (i = 0; i < logcount; i++) {
			WT_ERR(__wt_log_extract_lognum(
			    session, logfiles[i], &lognum));
			if (lognum < min_lognum)
				WT_ERR(__wt_log_remove(
				    session, WT_LOG_FILENAME, lognum));
		}
	}
	WT_ERR(__wt_readunlock(session, conn->hot_backup_lock));
	locked = 0;
	__wt_log_files_free(session, logfiles, logcount);
	logfiles = NULL;
	logcount = 0;

	/*
	 * Indicate what is our new earliest LSN.  It is the start
	 * of the log file containing the last checkpoint.
	 */
	log->first_lsn.file = min_lognum;
	log->first_lsn.offset = 0;

	if (0)
err:		__wt_err(session, ret, "log archive server error");
	if (locked)
		WT_TRET(__wt_readunlock(session, conn->hot_backup_lock));
	if (logfiles != NULL)
		__wt_log_files_free(session, logfiles, logcount);
	return (ret);
}

示例#8

0

显示文件

文件： conn_log.c 项目： BobbWu/wiredtiger

/*
 * __log_server --
 *	The log server thread.
 */
static WT_THREAD_RET
__log_server(void *arg)
{
	struct timespec start, now;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	uint64_t timediff;
	bool did_work, locked, signalled;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = signalled = false;

	/*
	 * Set this to the number of milliseconds we want to run archive and
	 * pre-allocation.  Start it so that we run on the first time through.
	 */
	timediff = WT_THOUSAND;

	/*
	 * The log server thread does a variety of work.  It forces out any
	 * buffered log writes.  It pre-allocates log files and it performs
	 * log archiving.  The reason the wrlsn thread does not force out
	 * the buffered writes is because we want to process and move the
	 * write_lsn forward as quickly as possible.  The same reason applies
	 * to why the log file server thread does not force out the writes.
	 * That thread does fsync calls which can take a long time and we
	 * don't want log records sitting in the buffer over the time it
	 * takes to sync out an earlier file.
	 */
	did_work = true;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Slots depend on future activity.  Force out buffered
		 * writes in case we are idle.  This cannot be part of the
		 * wrlsn thread because of interaction advancing the write_lsn
		 * and a buffer may need to wait for the write_lsn to advance
		 * in the case of a synchronous buffer.  We end up with a hang.
		 */
		WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work));

		/*
		 * We don't want to archive or pre-allocate files as often as
		 * we want to force out log buffers.  Only do it once per second
		 * or if the condition was signalled.
		 */
		if (timediff >= WT_THOUSAND || signalled) {

			/*
			 * Perform log pre-allocation.
			 */
			if (conn->log_prealloc > 0) {
				/*
				 * Log file pre-allocation is disabled when a
				 * hot backup cursor is open because we have
				 * agreed not to rename or remove any files in
				 * the database directory.
				 */
				WT_ERR(__wt_readlock(
				    session, conn->hot_backup_lock));
				locked = true;
				if (!conn->hot_backup)
					WT_ERR(__log_prealloc_once(session));
				WT_ERR(__wt_readunlock(
				    session, conn->hot_backup_lock));
				locked = false;
			}

			/*
			 * Perform the archive.
			 */
			if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
				if (__wt_try_writelock(
				    session, log->log_archive_lock) == 0) {
					ret = __log_archive_once(session, 0);
					WT_TRET(__wt_writeunlock(
					    session, log->log_archive_lock));
					WT_ERR(ret);
				} else
					WT_ERR(
					    __wt_verbose(session, WT_VERB_LOG,
					    "log_archive: Blocked due to open "
					    "log cursor holding archive lock"));
			}
		}

		/* Wait until the next event. */

		WT_ERR(__wt_epoch(session, &start));
		WT_ERR(__wt_cond_auto_wait_signal(session, conn->log_cond,
		    did_work, &signalled));
		WT_ERR(__wt_epoch(session, &now));
		timediff = WT_TIMEDIFF_MS(now, start);
	}

	if (0) {
err:		__wt_err(session, ret, "log server error");
		if (locked)
			WT_TRET(__wt_readunlock(
			    session, conn->hot_backup_lock));
	}
	return (WT_THREAD_RET_VALUE);
}

示例#9

0

显示文件

文件： txn.c 项目： judahschvimer/mongo

/*
 * __wt_txn_update_oldest --
 *	Sweep the running transactions to update the oldest ID required.
 */
int
__wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	uint64_t current_id, last_running, oldest_id;
	uint64_t prev_last_running, prev_oldest_id;
	bool strict, wait;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	strict = LF_ISSET(WT_TXN_OLDEST_STRICT);
	wait = LF_ISSET(WT_TXN_OLDEST_WAIT);

	current_id = last_running = txn_global->current;
	prev_last_running = txn_global->last_running;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * For pure read-only workloads, or if the update isn't forced and the
	 * oldest ID isn't too far behind, avoid scanning.
	 */
	if (prev_oldest_id == current_id ||
	    (!strict && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
		return (0);

	/* First do a read-only scan. */
	if (wait)
		__wt_readlock(session, txn_global->scan_rwlock);
	else if ((ret =
	    __wt_try_readlock(session, txn_global->scan_rwlock)) != 0)
		return (ret == EBUSY ? 0 : ret);
	__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);
	__wt_readunlock(session, txn_global->scan_rwlock);

	/*
	 * If the state hasn't changed (or hasn't moved far enough for
	 * non-forced updates), give up.
	 */
	if ((oldest_id == prev_oldest_id ||
	    (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
	    ((last_running == prev_last_running) ||
	    (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))))
		return (0);

	/* It looks like an update is necessary, wait for exclusive access. */
	if (wait)
		__wt_writelock(session, txn_global->scan_rwlock);
	else if ((ret =
	    __wt_try_writelock(session, txn_global->scan_rwlock)) != 0)
		return (ret == EBUSY ? 0 : ret);

	/*
	 * If the oldest ID has been updated while we waited, don't bother
	 * scanning.
	 */
	if (WT_TXNID_LE(oldest_id, txn_global->oldest_id) &&
	    WT_TXNID_LE(last_running, txn_global->last_running))
		goto done;

	/*
	 * Re-scan now that we have exclusive access.  This is necessary because
	 * threads get transaction snapshots with read locks, and we have to be
	 * sure that there isn't a thread that has got a snapshot locally but
	 * not yet published its snap_min.
	 */
	__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);

#ifdef HAVE_DIAGNOSTIC
	{
	/*
	 * Make sure the ID doesn't move past any named snapshots.
	 *
	 * Don't include the read/assignment in the assert statement.  Coverity
	 * complains if there are assignments only done in diagnostic builds,
	 * and when the read is from a volatile.
	 */
	uint64_t id = txn_global->nsnap_oldest_id;
	WT_ASSERT(session,
	    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
	}
#endif
	/* Update the oldest ID. */
	if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
		txn_global->oldest_id = oldest_id;
	if (WT_TXNID_LT(txn_global->last_running, last_running)) {
		txn_global->last_running = last_running;

#ifdef HAVE_VERBOSE
		/* Output a verbose message about long-running transactions,
		 * but only when some progress is being made. */
		if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
		    current_id - oldest_id > 10000 && oldest_session != NULL) {
			__wt_verbose(session, WT_VERB_TRANSACTION,
			    "old snapshot %" PRIu64
			    " pinned in session %" PRIu32 " [%s]"
			    " with snap_min %" PRIu64 "\n",
			    oldest_id, oldest_session->id,
			    oldest_session->lastop,
			    oldest_session->txn.snap_min);
		}
#endif
	}

done:	__wt_writeunlock(session, txn_global->scan_rwlock);
	return (ret);
}

示例#10

0

显示文件

文件： txn.c 项目： judahschvimer/mongo

/*
 * __wt_txn_get_snapshot --
 *	Allocate a snapshot.
 */
int
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	uint64_t current_id, id;
	uint64_t prev_oldest_id, snap_min;
	uint32_t i, n, session_cnt;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = WT_SESSION_TXN_STATE(session);
	n = 0;

	/*
	 * Spin waiting for the lock: the sleeps in our blocking readlock
	 * implementation are too slow for scanning the transaction table.
	 */
	while ((ret =
	    __wt_try_readlock(session, txn_global->scan_rwlock)) == EBUSY)
		WT_PAUSE();
	WT_RET(ret);

	current_id = snap_min = txn_global->current;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * Include the checkpoint transaction, if one is running: we should
	 * ignore any uncommitted changes the checkpoint has written to the
	 * metadata.  We don't have to keep the checkpoint's changes pinned so
	 * don't including it in the published snap_min.
	 */
	if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE)
		txn->snapshot[n++] = id;

	/* For pure read-only workloads, avoid scanning. */
	if (prev_oldest_id == current_id) {
		txn_state->snap_min = current_id;
		/* Check that the oldest ID has not moved in the meantime. */
		WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
		goto done;
	}

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Build our snapshot of any concurrent transaction IDs.
		 *
		 * Ignore:
		 *  - Our own ID: we always read our own updates.
		 *  - The ID if it is older than the oldest ID we saw. This
		 *    can happen if we race with a thread that is allocating
		 *    an ID -- the ID will not be used because the thread will
		 *    keep spinning until it gets a valid one.
		 */
		if (s != txn_state &&
		    (id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id)) {
			txn->snapshot[n++] = id;
			if (WT_TXNID_LT(id, snap_min))
				snap_min = id;
		}
	}

	/*
	 * If we got a new snapshot, update the published snap_min for this
	 * session.
	 */
	WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min));
	WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
	txn_state->snap_min = snap_min;

done:	__wt_readunlock(session, txn_global->scan_rwlock);
	__txn_sort_snapshot(session, n, current_id);
	return (0);
}

示例#11

0

显示文件

文件： conn_dhandle.c 项目： AshishGautamKarn/mongo

/*
 * __conn_dhandle_open_lock --
 *	Spin on the current data handle until either (a) it is open, read
 *	locked; or (b) it is closed, write locked.  If exclusive access is
 *	requested and cannot be granted immediately because the handle is
 *	in use, fail with EBUSY.
 *
 *	Here is a brief summary of how different operations synchronize using
 *	either the schema lock, handle locks or handle flags:
 *
 *	open -- holds the schema lock, one thread gets the handle exclusive,
 *		reverts to a shared handle lock and drops the schema lock
 *		once the handle is open;
 *	bulk load -- sets bulk and exclusive;
 *	salvage, truncate, update, verify -- hold the schema lock, set a
 *		"special" flag;
 *	sweep -- gets a write lock on the handle, doesn't set exclusive
 *
 *	The schema lock prevents a lot of potential conflicts: we should never
 *	see handles being salvaged or verified because those operation hold the
 *	schema lock.  However, it is possible to see a handle that is being
 *	bulk loaded, or that the sweep server is closing.
 *
 *	The principle here is that application operations can cause other
 *	application operations to fail (so attempting to open a cursor on a
 *	file while it is being bulk-loaded will fail), but internal or
 *	database-wide operations should not prevent application-initiated
 *	operations.  For example, attempting to verify a file should not fail
 *	because the sweep server happens to be in the process of closing that
 *	file.
 */
static int
__conn_dhandle_open_lock(
    WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, uint32_t flags)
{
	WT_BTREE *btree;
	WT_DECL_RET;
	int is_open, lock_busy, want_exclusive;

	btree = dhandle->handle;
	lock_busy = 0;
	want_exclusive = LF_ISSET(WT_DHANDLE_EXCLUSIVE) ? 1 : 0;

	/*
	 * Check that the handle is open.  We've already incremented
	 * the reference count, so once the handle is open it won't be
	 * closed by another thread.
	 *
	 * If we can see the WT_DHANDLE_OPEN flag set while holding a
	 * lock on the handle, then it's really open and we can start
	 * using it.  Alternatively, if we can get an exclusive lock
	 * and WT_DHANDLE_OPEN is still not set, we need to do the open.
	 */
	for (;;) {
		/*
		 * If the handle is already open for a special operation,
		 * give up.
		 */
		if (F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))
			return (EBUSY);

		/*
		 * If the handle is open, get a read lock and recheck.
		 *
		 * Wait for a read lock if we want exclusive access and failed
		 * to get it: the sweep server may be closing this handle, and
		 * we need to wait for it to complete.  If we want exclusive
		 * access and find the handle open once we get the read lock,
		 * give up: some other thread has it locked for real.
		 */
		if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
		    (!want_exclusive || lock_busy)) {
			WT_RET(__wt_readlock(session, dhandle->rwlock));
			is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN) ? 1 : 0;
			if (is_open && !want_exclusive)
				return (0);
			WT_RET(__wt_readunlock(session, dhandle->rwlock));
		} else
			is_open = 0;

		/*
		 * It isn't open or we want it exclusive: try to get an
		 * exclusive lock.  There is some subtlety here: if we race
		 * with another thread that successfully opens the file, we
		 * don't want to block waiting to get exclusive access.
		 */
		if ((ret = __wt_try_writelock(session, dhandle->rwlock)) == 0) {
			/*
			 * If it was opened while we waited, drop the write
			 * lock and get a read lock instead.
			 */
			if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
			    !want_exclusive) {
				lock_busy = 0;
				WT_RET(
				    __wt_writeunlock(session, dhandle->rwlock));
				continue;
			}

			/* We have an exclusive lock, we're done. */
			F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
			return (0);
		} else if (ret != EBUSY || (is_open && want_exclusive))
			return (ret);
		else
			lock_busy = 1;

		/* Give other threads a chance to make progress. */
		__wt_yield();
	}
}

示例#12

0

显示文件

文件： txn_rollback_to_stable.c 项目： mpobrien/mongo

/*
 * __txn_rollback_to_stable_btree --
 *	Called for each open handle - choose to either skip or wipe the commits
 */
static int
__txn_rollback_to_stable_btree(
    WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_DECL_RET;
	WT_DECL_TIMESTAMP(rollback_timestamp)
	WT_BTREE *btree;
	WT_TXN_GLOBAL *txn_global;

	WT_UNUSED(cfg);

	btree = S2BT(session);
	txn_global = &S2C(session)->txn_global;

	/*
	 * Immediately durable files don't get their commits wiped. This case
	 * mostly exists to support the semantic required for the oplog in
	 * MongoDB - updates that have been made to the oplog should not be
	 * aborted. It also wouldn't be safe to roll back updates for any
	 * table that had it's records logged, since those updates would be
	 * recovered after a crash making them inconsistent.
	 */
	if (__wt_btree_immediately_durable(session)) {
		/*
		 * Add the btree ID to the bitstring, so we can exclude any
		 * lookaside entries for this btree.
		 */
		__bit_set(
		    S2C(session)->stable_rollback_bitstring, btree->id);
		return (0);
	}

	/* There is never anything to do for checkpoint handles */
	if (session->dhandle->checkpoint != NULL)
		return (0);

	/* There is nothing to do on an empty tree. */
	if (btree->root.page == NULL)
		return (0);

	if (btree->type != BTREE_ROW)
		WT_RET_MSG(session, EINVAL, "rollback_to_stable "
		    "is only supported for row store btrees");

	/*
	 * Copy the stable timestamp, otherwise we'd need to lock it each time
	 * it's accessed. Even though the stable timestamp isn't supposed to be
	 * updated while rolling back, accessing it without a lock would
	 * violate protocol.
	 */
	__wt_readlock(session, &txn_global->rwlock);
	__wt_timestamp_set(&rollback_timestamp, &txn_global->stable_timestamp);
	__wt_readunlock(session, &txn_global->rwlock);

	/*
	 * Ensure the eviction server is out of the file - we don't
	 * want it messing with us. This step shouldn't be required, but
	 * it simplifies some of the reasoning about what state trees can
	 * be in.
	 */
	WT_RET(__wt_evict_file_exclusive_on(session));
	ret = __txn_rollback_to_stable_btree_walk(
	    session, &rollback_timestamp);
	__wt_evict_file_exclusive_off(session);

	return (ret);
}

示例#13

0

显示文件

文件： txn_rollback_to_stable.c 项目： mpobrien/mongo

/*
 * __txn_rollback_to_stable_lookaside_fixup --
 *	Remove any updates that need to be rolled back from the lookaside file.
 */
static int
__txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_CURSOR *cursor;
	WT_DECL_RET;
	WT_DECL_TIMESTAMP(rollback_timestamp)
	WT_ITEM las_addr, las_key, las_timestamp;
	WT_TXN_GLOBAL *txn_global;
	uint64_t las_counter, las_txnid, remove_cnt;
	uint32_t las_id, session_flags;

	conn = S2C(session);
	cursor = NULL;
	remove_cnt = 0;
	session_flags = 0;		/* [-Werror=maybe-uninitialized] */
	WT_CLEAR(las_timestamp);

	/*
	 * Copy the stable timestamp, otherwise we'd need to lock it each time
	 * it's accessed. Even though the stable timestamp isn't supposed to be
	 * updated while rolling back, accessing it without a lock would
	 * violate protocol.
	 */
	txn_global = &S2C(session)->txn_global;
	__wt_readlock(session, &txn_global->rwlock);
	__wt_timestamp_set(&rollback_timestamp, &txn_global->stable_timestamp);
	__wt_readunlock(session, &txn_global->rwlock);

	__wt_las_cursor(session, &cursor, &session_flags);

	/* Discard pages we read as soon as we're done with them. */
	F_SET(session, WT_SESSION_NO_CACHE);

	/* Walk the file. */
	for (; (ret = cursor->next(cursor)) == 0; ) {
		WT_ERR(cursor->get_key(cursor, &las_id, &las_addr, &las_counter,
		    &las_txnid, &las_timestamp, &las_key));

		/* Check the file ID so we can skip durable tables */
		if (__bit_test(conn->stable_rollback_bitstring, las_id))
			continue;

		/*
		 * Entries with no timestamp will have a timestamp of zero,
		 * which will fail the following check and cause them to never
		 * be removed.
		 */
		if (__wt_timestamp_cmp(
		    &rollback_timestamp, las_timestamp.data) < 0) {
			WT_ERR(cursor->remove(cursor));
			++remove_cnt;
		}
	}
	WT_ERR_NOTFOUND_OK(ret);
err:	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
	/*
	 * If there were races to remove records, we can over-count. Underflow
	 * isn't fatal, but check anyway so we don't skew low over time.
	 */
	if (remove_cnt > conn->las_record_cnt)
		conn->las_record_cnt = 0;
	else if (remove_cnt > 0)
		(void)__wt_atomic_sub64(&conn->las_record_cnt, remove_cnt);

	F_CLR(session, WT_SESSION_NO_CACHE);

	return (ret);
}

示例#14

0

显示文件

文件： conn_log.c 项目： judahschvimer/mongo

/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint32_t filenum;
	bool locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = false;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * We update the close file handle before updating the
			 * close LSN when changing files.  It is possible we
			 * could see mismatched settings.  If we do, yield
			 * until it is set.  This should rarely happen.
			 */
			while (log->log_close_lsn.l.file < filenum)
				__wt_yield();

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				WT_ERR(__wt_fsync(session, close_fh, true));

				/*
				 * We want to have the file size reflect actual
				 * data with minimal pre-allocated zeroed space.
				 * We can't truncate the file during hot backup,
				 * or the underlying file system may not support
				 * truncate: both are OK, it's just more work
				 * during cursor traversal.
				 */
				if (!conn->hot_backup) {
					__wt_readlock(
					    session, conn->hot_backup_lock);
					if (!conn->hot_backup)
						WT_ERR_ERROR_OK(
						    __wt_ftruncate(session,
						    close_fh,
						    close_end_lsn.l.offset),
						    ENOTSUP);
					__wt_readunlock(
					    session, conn->hot_backup_lock);
				}
				WT_SET_LSN(&close_end_lsn,
				    close_end_lsn.l.file + 1, 0);
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				__wt_cond_signal(session, log->log_sync_cond);
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 *
			 * We also have to wait for the written LSN and the
			 * sync LSN to be in the same file so that we know we
			 * have synchronized all earlier log files.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				/*
				 * If the sync file is behind either the one
				 * wanted for a background sync or the write LSN
				 * has moved to another file continue to let
				 * this worker thread process that older file
				 * immediately.
				 */
				if ((log->sync_lsn.l.file <
				    log->bg_sync_lsn.l.file) ||
				    (log->sync_lsn.l.file < min_lsn.l.file))
					continue;
				WT_ERR(__wt_fsync(session, log->log_fh, true));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					WT_ASSERT(session,
					    min_lsn.l.file ==
					    log->sync_lsn.l.file);
					log->sync_lsn = min_lsn;
					__wt_cond_signal(
					    session, log->log_sync_cond);
				}
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				__wt_cond_auto_signal(
				    session, conn->log_wrlsn_cond);
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				__wt_yield();
				continue;
			}
		}
		/* Wait until the next event. */
		__wt_cond_wait(session, conn->log_file_cond, WT_MILLION / 10);
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}

示例#15

0

显示文件

文件： txn.c 项目： bsamek/wiredtiger

/*
 * __wt_txn_commit --
 *	Commit the current transaction.
 */
int
__wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_OP *op;
	u_int i;
	bool locked, readonly;
#ifdef HAVE_TIMESTAMPS
	wt_timestamp_t prev_commit_timestamp, ts;
	bool update_timestamp;
#endif

	txn = &session->txn;
	conn = S2C(session);
	txn_global = &conn->txn_global;
	locked = false;

	WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) ||
	    txn->mod_count == 0);

	readonly = txn->mod_count == 0;
	/*
	 * Look for a commit timestamp.
	 */
	WT_ERR(
	    __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval));
	if (cval.len != 0) {
#ifdef HAVE_TIMESTAMPS
		WT_ERR(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
		WT_ERR(__wt_timestamp_validate(session,
		    "commit", &ts, &cval, true, true, true));
		__wt_timestamp_set(&txn->commit_timestamp, &ts);
		__wt_txn_set_commit_timestamp(session);
#else
		WT_ERR_MSG(session, EINVAL, "commit_timestamp requires a "
		    "version of WiredTiger built with timestamp support");
#endif
	}

#ifdef HAVE_TIMESTAMPS
	/*
	 * Debugging checks on timestamps, if user requested them.
	 */
	if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) &&
	    !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
	    txn->mod_count != 0)
		WT_ERR_MSG(session, EINVAL, "commit_timestamp required and "
		    "none set on this transaction");
	if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) &&
	    F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
	    txn->mod_count != 0)
		WT_ERR_MSG(session, EINVAL, "no commit_timestamp required and "
		    "timestamp set on this transaction");
#endif
	/*
	 * The default sync setting is inherited from the connection, but can
	 * be overridden by an explicit "sync" setting for this transaction.
	 */
	WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));

	/*
	 * If the user chose the default setting, check whether sync is enabled
	 * for this transaction (either inherited or via begin_transaction).
	 * If sync is disabled, clear the field to avoid the log write being
	 * flushed.
	 *
	 * Otherwise check for specific settings.  We don't need to check for
	 * "on" because that is the default inherited from the connection.  If
	 * the user set anything in begin_transaction, we only override with an
	 * explicit setting.
	 */
	if (cval.len == 0) {
		if (!FLD_ISSET(txn->txn_logsync, WT_LOG_SYNC_ENABLED) &&
		    !F_ISSET(txn, WT_TXN_SYNC_SET))
			txn->txn_logsync = 0;
	} else {
		/*
		 * If the caller already set sync on begin_transaction then
		 * they should not be using sync on commit_transaction.
		 * Flag that as an error.
		 */
		if (F_ISSET(txn, WT_TXN_SYNC_SET))
			WT_ERR_MSG(session, EINVAL,
			    "Sync already set during begin_transaction");
		if (WT_STRING_MATCH("background", cval.str, cval.len))
			txn->txn_logsync = WT_LOG_BACKGROUND;
		else if (WT_STRING_MATCH("off", cval.str, cval.len))
			txn->txn_logsync = 0;
		/*
		 * We don't need to check for "on" here because that is the
		 * default to inherit from the connection setting.
		 */
	}

	/* Commit notification. */
	if (txn->notify != NULL)
		WT_ERR(txn->notify->notify(txn->notify,
		    (WT_SESSION *)session, txn->id, 1));

	/*
	 * We are about to release the snapshot: copy values into any
	 * positioned cursors so they don't point to updates that could be
	 * freed once we don't have a snapshot.
	 */
	if (session->ncursors > 0) {
		WT_DIAGNOSTIC_YIELD;
		WT_ERR(__wt_session_copy_values(session));
	}

	/* If we are logging, write a commit log record. */
	if (txn->logrec != NULL &&
	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
	    !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
		/*
		 * We are about to block on I/O writing the log.
		 * Release our snapshot in case it is keeping data pinned.
		 * This is particularly important for checkpoints.
		 */
		__wt_txn_release_snapshot(session);
		/*
		 * We hold the visibility lock for reading from the time
		 * we write our log record until the time we release our
		 * transaction so that the LSN any checkpoint gets will
		 * always reflect visible data.
		 */
		__wt_readlock(session, &txn_global->visibility_rwlock);
		locked = true;
		WT_ERR(__wt_txn_log_commit(session, cfg));
	}

	/* Note: we're going to commit: nothing can fail after this point. */

	/* Process and free updates. */
	for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
		switch (op->type) {
		case WT_TXN_OP_BASIC:
		case WT_TXN_OP_BASIC_TS:
		case WT_TXN_OP_INMEM:
			/*
			 * Switch reserved operations to abort to
			 * simplify obsolete update list truncation.
			 */
			if (op->u.upd->type == WT_UPDATE_RESERVED) {
				op->u.upd->txnid = WT_TXN_ABORTED;
				break;
			}

			/*
			 * Writes to the lookaside file can be evicted as soon
			 * as they commit.
			 */
			if (conn->cache->las_fileid != 0 &&
			    op->fileid == conn->cache->las_fileid) {
				op->u.upd->txnid = WT_TXN_NONE;
				break;
			}

#ifdef HAVE_TIMESTAMPS
			if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
			    op->type != WT_TXN_OP_BASIC_TS) {
				WT_ASSERT(session,
				    op->fileid != WT_METAFILE_ID);
				__wt_timestamp_set(&op->u.upd->timestamp,
				    &txn->commit_timestamp);
			}
#endif
			break;

		case WT_TXN_OP_REF:
#ifdef HAVE_TIMESTAMPS
			if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
				__wt_timestamp_set(
				    &op->u.ref->page_del->timestamp,
				    &txn->commit_timestamp);
#endif
			break;

		case WT_TXN_OP_TRUNCATE_COL:
		case WT_TXN_OP_TRUNCATE_ROW:
			/* Other operations don't need timestamps. */
			break;
		}

		__wt_txn_op_free(session, op);
	}
	txn->mod_count = 0;

#ifdef HAVE_TIMESTAMPS
	/*
	 * Track the largest commit timestamp we have seen.
	 *
	 * We don't actually clear the local commit timestamp, just the flag.
	 * That said, we can't update the global commit timestamp until this
	 * transaction is visible, which happens when we release it.
	 */
	update_timestamp = F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
#endif

	__wt_txn_release(session);
	if (locked)
		__wt_readunlock(session, &txn_global->visibility_rwlock);

#ifdef HAVE_TIMESTAMPS
	/* First check if we've already committed something in the future. */
	if (update_timestamp) {
		WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
		    __wt_timestamp_set(
			&prev_commit_timestamp, &txn_global->commit_timestamp));
		update_timestamp = __wt_timestamp_cmp(
		    &txn->commit_timestamp, &prev_commit_timestamp) > 0;
	}

	/*
	 * If it looks like we need to move the global commit timestamp,
	 * write lock and re-check.
	 */
	if (update_timestamp) {
#if WT_TIMESTAMP_SIZE == 8
		while (__wt_timestamp_cmp(
		    &txn->commit_timestamp, &prev_commit_timestamp) > 0) {
			if (__wt_atomic_cas64(
			    &txn_global->commit_timestamp.val,
			    prev_commit_timestamp.val,
			    txn->commit_timestamp.val)) {
				txn_global->has_commit_timestamp = true;
				break;
			}
		    __wt_timestamp_set(
			&prev_commit_timestamp, &txn_global->commit_timestamp);
		}
#else
		__wt_writelock(session, &txn_global->rwlock);
		if (__wt_timestamp_cmp(&txn->commit_timestamp,
		    &txn_global->commit_timestamp) > 0) {
			__wt_timestamp_set(&txn_global->commit_timestamp,
			    &txn->commit_timestamp);
			txn_global->has_commit_timestamp = true;
		}
		__wt_writeunlock(session, &txn_global->rwlock);
#endif
	}
#endif

	/*
	 * We're between transactions, if we need to block for eviction, it's
	 * a good time to do so.  Note that we must ignore any error return
	 * because the user's data is committed.
	 */
	if (!readonly)
		(void)__wt_cache_eviction_check(session, false, false, NULL);
	return (0);

err:	/*
	 * If anything went wrong, roll back.
	 *
	 * !!!
	 * Nothing can fail after this point.
	 */
	if (locked)
		__wt_readunlock(session, &txn_global->visibility_rwlock);
	WT_TRET(__wt_txn_rollback(session, cfg));
	return (ret);
}

示例#16

0

显示文件

文件： txn.c 项目： bsamek/wiredtiger

/*
 * __wt_txn_config --
 *	Configure a transaction.
 */
int
__wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_TXN *txn;

	txn = &session->txn;

	WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval));
	if (cval.len != 0)
		txn->isolation =
		    WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
		    WT_ISO_SNAPSHOT :
		    WT_STRING_MATCH("read-committed", cval.str, cval.len) ?
		    WT_ISO_READ_COMMITTED : WT_ISO_READ_UNCOMMITTED;

	/*
	 * The default sync setting is inherited from the connection, but can
	 * be overridden by an explicit "sync" setting for this transaction.
	 *
	 * We want to distinguish between inheriting implicitly and explicitly.
	 */
	F_CLR(txn, WT_TXN_SYNC_SET);
	WT_RET(__wt_config_gets_def(
	    session, cfg, "sync", (int)UINT_MAX, &cval));
	if (cval.val == 0 || cval.val == 1)
		/*
		 * This is an explicit setting of sync.  Set the flag so
		 * that we know not to overwrite it in commit_transaction.
		 */
		F_SET(txn, WT_TXN_SYNC_SET);

	/*
	 * If sync is turned off explicitly, clear the transaction's sync field.
	 */
	if (cval.val == 0)
		txn->txn_logsync = 0;

	WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval));
	if (cval.len > 0)
		/*
		 * The layering here isn't ideal - the named snapshot get
		 * function does both validation and setup. Otherwise we'd
		 * need to walk the list of named snapshots twice during
		 * transaction open.
		 */
		WT_RET(__wt_txn_named_snapshot_get(session, &cval));

	WT_RET(__wt_config_gets_def(session, cfg, "read_timestamp", 0, &cval));
	if (cval.len > 0) {
#ifdef HAVE_TIMESTAMPS
		wt_timestamp_t ts;
		WT_TXN_GLOBAL *txn_global;
		char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1];
		bool round_to_oldest;

		txn_global = &S2C(session)->txn_global;
		WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval));

		/*
		 * Read the configuration here to reduce the span of the
		 * critical section.
		 */
		WT_RET(__wt_config_gets_def(session,
		    cfg, "round_to_oldest", 0, &cval));
		round_to_oldest = cval.val;
		/*
		 * This code is not using the timestamp validate function to
		 * avoid a race between checking and setting transaction
		 * timestamp.
		 */
		__wt_readlock(session, &txn_global->rwlock);
		if (__wt_timestamp_cmp(&ts, &txn_global->oldest_timestamp) < 0)
		{
			WT_RET(__wt_timestamp_to_hex_string(session,
			    timestamp_buf, &ts));
			/*
			 * If given read timestamp is earlier than oldest
			 * timestamp then round the read timestamp to
			 * oldest timestamp.
			 */
			if (round_to_oldest)
				__wt_timestamp_set(&txn->read_timestamp,
				    &txn_global->oldest_timestamp);
			else {
				__wt_readunlock(session, &txn_global->rwlock);
				WT_RET_MSG(session, EINVAL, "read timestamp "
				    "%s older than oldest timestamp",
				    timestamp_buf);
			}
		} else {
			__wt_timestamp_set(&txn->read_timestamp, &ts);
			/*
			 * Reset to avoid a verbose message as read
			 * timestamp is not rounded to oldest timestamp.
			 */
			round_to_oldest = false;
		}

		__wt_txn_set_read_timestamp(session);
		__wt_readunlock(session, &txn_global->rwlock);
		txn->isolation = WT_ISO_SNAPSHOT;
		if (round_to_oldest) {
			/*
			 * This message is generated here to reduce the span of
			 * critical section.
			 */
			__wt_verbose(session, WT_VERB_TIMESTAMP, "Read "
			    "timestamp %s : Rounded to oldest timestamp",
			    timestamp_buf);
		}
#else
		WT_RET_MSG(session, EINVAL, "read_timestamp requires a "
		    "version of WiredTiger built with timestamp support");
#endif
	}

	return (0);
}