示例#1
0
文件: rec_track.c 项目: Andiry/mongo
/*
 * __ovfl_txnc_wrapup --
 *	Resolve the page's transaction-cache list.
 */
static int
__ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
{
	WT_OVFL_TXNC **e, **head, *txnc;
	uint64_t oldest_txn;
	size_t decr;
	int i;

	head = page->modify->ovfl_track->ovfl_txnc;

	/*
	 * Take a snapshot of the oldest transaction ID we need to keep alive.
	 * Since we do two passes through entries in the structure, the normal
	 * visibility check could give different results as the global ID moves
	 * forward.
	 */
	oldest_txn = __wt_txn_oldest_id(session);

	/*
	 * Discard any transaction-cache records with transaction IDs earlier
	 * than any in the system.
	 *
	 * First, walk the overflow transaction-cache skip lists (except for
	 * the lowest level), fixing up links.
	 */
	for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
		for (e = &head[i]; (txnc = *e) != NULL;) {
			if (WT_TXNID_LE(oldest_txn, txnc->current)) {
				e = &txnc->next[i];
				continue;
			}
			*e = txnc->next[i];
		}

	/* Second, discard any no longer needed transaction-cache records. */
	decr = 0;
	for (e = &head[0]; (txnc = *e) != NULL;) {
		if (WT_TXNID_LE(oldest_txn, txnc->current)) {
			e = &txnc->next[0];
			continue;
		}
		*e = txnc->next[0];

		if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
			WT_RET(
			    __ovfl_txnc_verbose(session, page, txnc, "free"));

		decr += WT_OVFL_SIZE(txnc, WT_OVFL_TXNC);
		__wt_free(session, txnc);
	}

	if (decr != 0)
		__wt_cache_page_inmem_decr(session, page, decr);
	return (0);
}
示例#2
0
文件: txn.c 项目: To4e/mongo
/*
 * __txn_sort_snapshot --
 *	Sort a snapshot for faster searching and set the min/max bounds.
 */
static void
__txn_sort_snapshot(WT_SESSION_IMPL *session, uint32_t n, uint64_t snap_max)
{
	WT_TXN *txn;

	txn = &session->txn;

	if (n > 1)
		__snapsort(txn->snapshot, n);

	txn->snapshot_count = n;
	txn->snap_max = snap_max;
	txn->snap_min = (n > 0 && WT_TXNID_LE(txn->snapshot[0], snap_max)) ?
	    txn->snapshot[0] : snap_max;
	F_SET(txn, WT_TXN_HAS_SNAPSHOT);
	WT_ASSERT(session, n == 0 || txn->snap_min != WT_TXN_NONE);
}
示例#3
0
文件: txn.c 项目: To4e/mongo
/*
 * __wt_txn_update_oldest --
 *	Sweep the running transactions to update the oldest ID required.
 * !!!
 * If a data-source is calling the WT_EXTENSION_API.transaction_oldest
 * method (for the oldest transaction ID not yet visible to a running
 * transaction), and then comparing that oldest ID against committed
 * transactions to see if updates for a committed transaction are still
 * visible to running transactions, the oldest transaction ID may be
 * the same as the last committed transaction ID, if the transaction
 * state wasn't refreshed after the last transaction committed.  Push
 * past the last committed transaction.
*/
void
__wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t current_id, id, last_running, oldest_id, prev_oldest_id;
	uint32_t i, session_cnt;
	int32_t count;
	int last_running_moved;

	conn = S2C(session);
	txn_global = &conn->txn_global;

	current_id = last_running = txn_global->current;
	oldest_session = NULL;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * For pure read-only workloads, or if the update isn't forced and the
	 * oldest ID isn't too far behind, avoid scanning.
	 */
	if (prev_oldest_id == current_id ||
	    (!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
		return;

	/*
	 * We're going to scan.  Increment the count of scanners to prevent the
	 * oldest ID from moving forwards.  Spin if the count is negative,
	 * which indicates that some thread is moving the oldest ID forwards.
	 */
	do {
		if ((count = txn_global->scan_count) < 0)
			WT_PAUSE();
	} while (count < 0 ||
	    !WT_ATOMIC_CAS4(txn_global->scan_count, count, count + 1));

	/* The oldest ID cannot change until the scan count goes to zero. */
	prev_oldest_id = txn_global->oldest_id;
	current_id = oldest_id = last_running = txn_global->current;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Update the oldest ID.
		 *
		 * Ignore: IDs older than the oldest ID we saw. This can happen
		 * if we race with a thread that is allocating an ID -- the ID
		 * will not be used because the thread will keep spinning until
		 * it gets a valid one.
		 */
		if ((id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id) &&
		    WT_TXNID_LT(id, last_running))
			last_running = id;

		/*
		 * !!!
		 * Note: Don't ignore snap_min values older than the previous
		 * oldest ID.  Read-uncommitted operations publish snap_min
		 * values without incrementing scan_count to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for
		 * more details.
		 */
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, oldest_id)) {
			oldest_id = id;
			oldest_session = &conn->sessions[i];
		}
	}

	if (WT_TXNID_LT(last_running, oldest_id))
		oldest_id = last_running;

	/* The oldest ID can't move past any named snapshots. */
	if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
	    WT_TXNID_LT(id, oldest_id))
		oldest_id = id;

	/* Update the last running ID. */
	last_running_moved =
	    WT_TXNID_LT(txn_global->last_running, last_running);

	/* Update the oldest ID. */
	if ((WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) &&
	    WT_ATOMIC_CAS4(txn_global->scan_count, 1, -1)) {
		WT_ORDERED_READ(session_cnt, conn->session_cnt);
		for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
			if ((id = s->id) != WT_TXN_NONE &&
			    WT_TXNID_LT(id, last_running))
				last_running = id;
			if ((id = s->snap_min) != WT_TXN_NONE &&
			    WT_TXNID_LT(id, oldest_id))
				oldest_id = id;
		}

		if (WT_TXNID_LT(last_running, oldest_id))
			oldest_id = last_running;

#ifdef HAVE_DIAGNOSTIC
		/*
		 * Make sure the ID doesn't move past any named snapshots.
		 *
		 * Don't include the read/assignment in the assert statement.
		 * Coverity complains if there are assignments only done in
		 * diagnostic builds, and when the read is from a volatile.
		 */
		id = txn_global->nsnap_oldest_id;
		WT_ASSERT(session,
		    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
#endif
		if (WT_TXNID_LT(txn_global->last_running, last_running))
			txn_global->last_running = last_running;
		if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
			txn_global->oldest_id = oldest_id;
		WT_ASSERT(session, txn_global->scan_count == -1);
		txn_global->scan_count = 0;
	} else {
		if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
		    current_id - oldest_id > 10000 && last_running_moved &&
		    oldest_session != NULL) {
			(void)__wt_verbose(session, WT_VERB_TRANSACTION,
			    "old snapshot %" PRIu64
			    " pinned in session %d [%s]"
			    " with snap_min %" PRIu64 "\n",
			    oldest_id, oldest_session->id,
			    oldest_session->lastop,
			    oldest_session->txn.snap_min);
		}
		WT_ASSERT(session, txn_global->scan_count > 0);
		(void)WT_ATOMIC_SUB4(txn_global->scan_count, 1);
	}
}
示例#4
0
文件: txn.c 项目: To4e/mongo
/*
 * __wt_txn_get_snapshot --
 *	Allocate a snapshot.
 */
void
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	uint64_t current_id, id;
	uint64_t prev_oldest_id, snap_min;
	uint32_t i, n, session_cnt;
	int32_t count;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = WT_SESSION_TXN_STATE(session);

	/*
	 * We're going to scan.  Increment the count of scanners to prevent the
	 * oldest ID from moving forwards.  Spin if the count is negative,
	 * which indicates that some thread is moving the oldest ID forwards.
	 */
	do {
		if ((count = txn_global->scan_count) < 0)
			WT_PAUSE();
	} while (count < 0 ||
	    !WT_ATOMIC_CAS4(txn_global->scan_count, count, count + 1));

	current_id = snap_min = txn_global->current;
	prev_oldest_id = txn_global->oldest_id;

	/* For pure read-only workloads, avoid scanning. */
	if (prev_oldest_id == current_id) {
		txn_state->snap_min = current_id;
		__txn_sort_snapshot(session, 0, current_id);

		/* Check that the oldest ID has not moved in the meantime. */
		if (prev_oldest_id == txn_global->oldest_id) {
			WT_ASSERT(session, txn_global->scan_count > 0);
			(void)WT_ATOMIC_SUB4(txn_global->scan_count, 1);
			return;
		}
	}

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Build our snapshot of any concurrent transaction IDs.
		 *
		 * Ignore:
		 *  - Our own ID: we always read our own updates.
		 *  - The ID if it is older than the oldest ID we saw. This
		 *    can happen if we race with a thread that is allocating
		 *    an ID -- the ID will not be used because the thread will
		 *    keep spinning until it gets a valid one.
		 */
		if (s != txn_state &&
		    (id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id)) {
			txn->snapshot[n++] = id;
			if (WT_TXNID_LT(id, snap_min))
				snap_min = id;
		}
	}

	/*
	 * If we got a new snapshot, update the published snap_min for this
	 * session.
	 */
	WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min));
	WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
	txn_state->snap_min = snap_min;

	WT_ASSERT(session, txn_global->scan_count > 0);
	(void)WT_ATOMIC_SUB4(txn_global->scan_count, 1);

	__txn_sort_snapshot(session, n, current_id);
}
示例#5
0
文件: txn.c 项目: judahschvimer/mongo
/*
 * __wt_txn_update_oldest --
 *	Sweep the running transactions to update the oldest ID required.
 */
int
__wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	uint64_t current_id, last_running, oldest_id;
	uint64_t prev_last_running, prev_oldest_id;
	bool strict, wait;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	strict = LF_ISSET(WT_TXN_OLDEST_STRICT);
	wait = LF_ISSET(WT_TXN_OLDEST_WAIT);

	current_id = last_running = txn_global->current;
	prev_last_running = txn_global->last_running;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * For pure read-only workloads, or if the update isn't forced and the
	 * oldest ID isn't too far behind, avoid scanning.
	 */
	if (prev_oldest_id == current_id ||
	    (!strict && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
		return (0);

	/* First do a read-only scan. */
	if (wait)
		__wt_readlock(session, txn_global->scan_rwlock);
	else if ((ret =
	    __wt_try_readlock(session, txn_global->scan_rwlock)) != 0)
		return (ret == EBUSY ? 0 : ret);
	__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);
	__wt_readunlock(session, txn_global->scan_rwlock);

	/*
	 * If the state hasn't changed (or hasn't moved far enough for
	 * non-forced updates), give up.
	 */
	if ((oldest_id == prev_oldest_id ||
	    (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
	    ((last_running == prev_last_running) ||
	    (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))))
		return (0);

	/* It looks like an update is necessary, wait for exclusive access. */
	if (wait)
		__wt_writelock(session, txn_global->scan_rwlock);
	else if ((ret =
	    __wt_try_writelock(session, txn_global->scan_rwlock)) != 0)
		return (ret == EBUSY ? 0 : ret);

	/*
	 * If the oldest ID has been updated while we waited, don't bother
	 * scanning.
	 */
	if (WT_TXNID_LE(oldest_id, txn_global->oldest_id) &&
	    WT_TXNID_LE(last_running, txn_global->last_running))
		goto done;

	/*
	 * Re-scan now that we have exclusive access.  This is necessary because
	 * threads get transaction snapshots with read locks, and we have to be
	 * sure that there isn't a thread that has got a snapshot locally but
	 * not yet published its snap_min.
	 */
	__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);

#ifdef HAVE_DIAGNOSTIC
	{
	/*
	 * Make sure the ID doesn't move past any named snapshots.
	 *
	 * Don't include the read/assignment in the assert statement.  Coverity
	 * complains if there are assignments only done in diagnostic builds,
	 * and when the read is from a volatile.
	 */
	uint64_t id = txn_global->nsnap_oldest_id;
	WT_ASSERT(session,
	    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
	}
#endif
	/* Update the oldest ID. */
	if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
		txn_global->oldest_id = oldest_id;
	if (WT_TXNID_LT(txn_global->last_running, last_running)) {
		txn_global->last_running = last_running;

#ifdef HAVE_VERBOSE
		/* Output a verbose message about long-running transactions,
		 * but only when some progress is being made. */
		if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
		    current_id - oldest_id > 10000 && oldest_session != NULL) {
			__wt_verbose(session, WT_VERB_TRANSACTION,
			    "old snapshot %" PRIu64
			    " pinned in session %" PRIu32 " [%s]"
			    " with snap_min %" PRIu64 "\n",
			    oldest_id, oldest_session->id,
			    oldest_session->lastop,
			    oldest_session->txn.snap_min);
		}
#endif
	}

done:	__wt_writeunlock(session, txn_global->scan_rwlock);
	return (ret);
}
示例#6
0
文件: txn.c 项目: judahschvimer/mongo
/*
 * __txn_oldest_scan --
 *	Sweep the running transactions to calculate the oldest ID required.
 */
static void
__txn_oldest_scan(WT_SESSION_IMPL *session,
    uint64_t *oldest_idp, uint64_t *last_runningp,
    WT_SESSION_IMPL **oldest_sessionp)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t id, last_running, oldest_id, prev_oldest_id;
	uint32_t i, session_cnt;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	oldest_session = NULL;

	/* The oldest ID cannot change while we are holding the scan lock. */
	prev_oldest_id = txn_global->oldest_id;
	oldest_id = last_running = txn_global->current;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Update the oldest ID.
		 *
		 * Ignore: IDs older than the oldest ID we saw. This can happen
		 * if we race with a thread that is allocating an ID -- the ID
		 * will not be used because the thread will keep spinning until
		 * it gets a valid one.
		 */
		if ((id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id) &&
		    WT_TXNID_LT(id, last_running))
			last_running = id;

		/*
		 * !!!
		 * Note: Don't ignore snap_min values older than the previous
		 * oldest ID.  Read-uncommitted operations publish snap_min
		 * values without acquiring the scan lock to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for
		 * more details.
		 */
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, oldest_id)) {
			oldest_id = id;
			oldest_session = &conn->sessions[i];
		}
	}

	if (WT_TXNID_LT(last_running, oldest_id))
		oldest_id = last_running;

	/* The oldest ID can't move past any named snapshots. */
	if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
	    WT_TXNID_LT(id, oldest_id))
		oldest_id = id;

	*oldest_idp = oldest_id;
	*oldest_sessionp = oldest_session;
	*last_runningp = last_running;
}
示例#7
0
文件: txn.c 项目: judahschvimer/mongo
/*
 * __wt_txn_get_snapshot --
 *	Allocate a snapshot.
 */
int
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	uint64_t current_id, id;
	uint64_t prev_oldest_id, snap_min;
	uint32_t i, n, session_cnt;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = WT_SESSION_TXN_STATE(session);
	n = 0;

	/*
	 * Spin waiting for the lock: the sleeps in our blocking readlock
	 * implementation are too slow for scanning the transaction table.
	 */
	while ((ret =
	    __wt_try_readlock(session, txn_global->scan_rwlock)) == EBUSY)
		WT_PAUSE();
	WT_RET(ret);

	current_id = snap_min = txn_global->current;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * Include the checkpoint transaction, if one is running: we should
	 * ignore any uncommitted changes the checkpoint has written to the
	 * metadata.  We don't have to keep the checkpoint's changes pinned so
	 * don't including it in the published snap_min.
	 */
	if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE)
		txn->snapshot[n++] = id;

	/* For pure read-only workloads, avoid scanning. */
	if (prev_oldest_id == current_id) {
		txn_state->snap_min = current_id;
		/* Check that the oldest ID has not moved in the meantime. */
		WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
		goto done;
	}

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Build our snapshot of any concurrent transaction IDs.
		 *
		 * Ignore:
		 *  - Our own ID: we always read our own updates.
		 *  - The ID if it is older than the oldest ID we saw. This
		 *    can happen if we race with a thread that is allocating
		 *    an ID -- the ID will not be used because the thread will
		 *    keep spinning until it gets a valid one.
		 */
		if (s != txn_state &&
		    (id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id)) {
			txn->snapshot[n++] = id;
			if (WT_TXNID_LT(id, snap_min))
				snap_min = id;
		}
	}

	/*
	 * If we got a new snapshot, update the published snap_min for this
	 * session.
	 */
	WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min));
	WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
	txn_state->snap_min = snap_min;

done:	__wt_readunlock(session, txn_global->scan_rwlock);
	__txn_sort_snapshot(session, n, current_id);
	return (0);
}
示例#8
0
文件: txn.c 项目: bsamek/wiredtiger
/*
 * __txn_oldest_scan --
 *	Sweep the running transactions to calculate the oldest ID required.
 */
static void
__txn_oldest_scan(WT_SESSION_IMPL *session,
    uint64_t *oldest_idp, uint64_t *last_runningp, uint64_t *metadata_pinnedp,
    WT_SESSION_IMPL **oldest_sessionp)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t id, last_running, metadata_pinned, oldest_id, prev_oldest_id;
	uint32_t i, session_cnt;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	oldest_session = NULL;

	/* The oldest ID cannot change while we are holding the scan lock. */
	prev_oldest_id = txn_global->oldest_id;
	last_running = oldest_id = txn_global->current;
	if ((metadata_pinned = txn_global->checkpoint_state.id) == WT_TXN_NONE)
		metadata_pinned = oldest_id;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/* Update the last running transaction ID. */
		if ((id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id) &&
		    WT_TXNID_LT(id, last_running))
			last_running = id;

		/* Update the metadata pinned ID. */
		if ((id = s->metadata_pinned) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, metadata_pinned))
			metadata_pinned = id;

		/*
		 * !!!
		 * Note: Don't ignore pinned ID values older than the previous
		 * oldest ID.  Read-uncommitted operations publish pinned ID
		 * values without acquiring the scan lock to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for more
		 * details.
		 */
		if ((id = s->pinned_id) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, oldest_id)) {
			oldest_id = id;
			oldest_session = &conn->sessions[i];
		}
	}

	if (WT_TXNID_LT(last_running, oldest_id))
		oldest_id = last_running;

	/* The oldest ID can't move past any named snapshots. */
	if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
	    WT_TXNID_LT(id, oldest_id))
		oldest_id = id;

	/* The metadata pinned ID can't move past the oldest ID. */
	if (WT_TXNID_LT(oldest_id, metadata_pinned))
		metadata_pinned = oldest_id;

	*last_runningp = last_running;
	*metadata_pinnedp = metadata_pinned;
	*oldest_idp = oldest_id;
	*oldest_sessionp = oldest_session;
}