Exemplo n.º 1
0
/*
 * __wt_async_op_enqueue --
 *	Enqueue an operation onto the work queue.
 */
int
__wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op)
{
	WT_ASYNC *async;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	uint64_t cur_head, cur_tail, my_alloc, my_slot;
#ifdef	HAVE_DIAGNOSTIC
	WT_ASYNC_OP_IMPL *my_op;
#endif

	conn = S2C(session);
	async = conn->async;

	/*
	 * If an application re-uses a WT_ASYNC_OP, we end up here with an
	 * invalid object.
	 */
	if (op->state != WT_ASYNCOP_READY)
		WT_RET_MSG(session, EINVAL,
		    "application error: WT_ASYNC_OP already in use");

	/*
	 * Enqueue op at the tail of the work queue.
	 * We get our slot in the ring buffer to use.
	 */
	my_alloc = WT_ATOMIC_ADD8(async->alloc_head, 1);
	my_slot = my_alloc % async->async_qsize;

	/*
	 * Make sure we haven't wrapped around the queue.
	 * If so, wait for the tail to advance off this slot.
	 */
	WT_ORDERED_READ(cur_tail, async->tail_slot);
	while (cur_tail == my_slot) {
		__wt_yield();
		WT_ORDERED_READ(cur_tail, async->tail_slot);
	}

#ifdef	HAVE_DIAGNOSTIC
	WT_ORDERED_READ(my_op, async->async_queue[my_slot]);
	if (my_op != NULL)
		return (__wt_panic(session));
#endif
	WT_PUBLISH(async->async_queue[my_slot], op);
	op->state = WT_ASYNCOP_ENQUEUED;
	if (WT_ATOMIC_ADD4(async->cur_queue, 1) > async->max_queue)
		WT_PUBLISH(async->max_queue, async->cur_queue);
	/*
	 * Multiple threads may be adding ops to the queue.  We need to wait
	 * our turn to make our slot visible to workers.
	 */
	WT_ORDERED_READ(cur_head, async->head);
	while (cur_head != (my_alloc - 1)) {
		__wt_yield();
		WT_ORDERED_READ(cur_head, async->head);
	}
	WT_PUBLISH(async->head, my_alloc);
	return (ret);
}
Exemplo n.º 2
0
/*
 * __wt_txn_get_oldest --
 *	Update the current transaction's cached copy of the oldest snap_min
 *	value.
 */
void
__wt_txn_get_oldest(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	wt_txnid_t id, oldest_snap_min;
	uint32_t i, session_cnt;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;

	/* If nothing has changed since last time, we're done. */
	if (txn->last_oldest_gen == txn_global->gen)
		return;
	txn->last_oldest_gen = txn_global->gen;

	oldest_snap_min =
	    (txn->id != WT_TXN_NONE) ? txn->id : txn_global->current;

	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states;
	    i < session_cnt;
	    i++, s++) {
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    TXNID_LT(id, oldest_snap_min))
			oldest_snap_min = id;
	}

	txn->oldest_snap_min = oldest_snap_min;
}
Exemplo n.º 3
0
/*
 * __wt_txn_get_snapshot --
 *	Set up a snapshot in the current transaction, without allocating an ID.
 */
void
__wt_txn_get_snapshot(
    WT_SESSION_IMPL *session, wt_txnid_t my_id, wt_txnid_t max_id, int force)
{
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	wt_txnid_t current_id, id, oldest_snap_min;
	uint32_t i, n, session_cnt;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = &txn_global->states[session->id];

	/* If nothing has changed since last time, we're done. */
	if (!force && txn->last_id == txn_global->current &&
	    txn->last_gen == txn_global->gen) {
		txn_state->snap_min = txn->snap_min;
		return;
	}

	do {
		/* Take a copy of the current session ID. */
		txn->last_gen = txn->last_oldest_gen = txn_global->gen;
		txn->last_id = oldest_snap_min = current_id =
		    txn_global->current;

		/* Copy the array of concurrent transactions. */
		WT_ORDERED_READ(session_cnt, conn->session_cnt);
		for (i = n = 0, s = txn_global->states;
		    i < session_cnt;
		    i++, s++) {
			/* Ignore the session's own transaction. */
			if (i == session->id)
				continue;
			if ((id = s->snap_min) != WT_TXN_NONE)
				if (TXNID_LT(id, oldest_snap_min))
					oldest_snap_min = id;
			if ((id = s->id) == WT_TXN_NONE)
				continue;
			else if (max_id == WT_TXN_NONE || TXNID_LT(id, max_id))
				txn->snapshot[n++] = id;
		}

		/*
		 * Ensure the snapshot reads are scheduled before re-checking
		 * the global current ID.
		 */
		WT_READ_BARRIER();
	} while (current_id != txn_global->current);

	__txn_sort_snapshot(session, n,
	    (max_id != WT_TXN_NONE) ? max_id : current_id,
	    oldest_snap_min);
	txn_state->snap_min =
	    (my_id == WT_TXN_NONE || TXNID_LT(txn->snap_min, my_id)) ?
	    txn->snap_min : my_id;
}
Exemplo n.º 4
0
/*
 * __split_oldest_gen --
 *	Calculate the oldest active split generation.
 */
static uint64_t
__split_oldest_gen(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *s;
	uint64_t gen, oldest;
	u_int i, session_cnt;

	conn = S2C(session);
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = conn->sessions, oldest = conn->split_gen + 1;
	    i < session_cnt;
	    i++, s++)
		if (((gen = s->split_gen) != 0) && gen < oldest)
			oldest = gen;

	return (oldest);
}
Exemplo n.º 5
0
Arquivo: txn.c Projeto: To4e/mongo
/*
 * __wt_txn_update_oldest --
 *	Sweep the running transactions to update the oldest ID required.
 * !!!
 * If a data-source is calling the WT_EXTENSION_API.transaction_oldest
 * method (for the oldest transaction ID not yet visible to a running
 * transaction), and then comparing that oldest ID against committed
 * transactions to see if updates for a committed transaction are still
 * visible to running transactions, the oldest transaction ID may be
 * the same as the last committed transaction ID, if the transaction
 * state wasn't refreshed after the last transaction committed.  Push
 * past the last committed transaction.
*/
void
__wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t current_id, id, last_running, oldest_id, prev_oldest_id;
	uint32_t i, session_cnt;
	int32_t count;
	int last_running_moved;

	conn = S2C(session);
	txn_global = &conn->txn_global;

	current_id = last_running = txn_global->current;
	oldest_session = NULL;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * For pure read-only workloads, or if the update isn't forced and the
	 * oldest ID isn't too far behind, avoid scanning.
	 */
	if (prev_oldest_id == current_id ||
	    (!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
		return;

	/*
	 * We're going to scan.  Increment the count of scanners to prevent the
	 * oldest ID from moving forwards.  Spin if the count is negative,
	 * which indicates that some thread is moving the oldest ID forwards.
	 */
	do {
		if ((count = txn_global->scan_count) < 0)
			WT_PAUSE();
	} while (count < 0 ||
	    !WT_ATOMIC_CAS4(txn_global->scan_count, count, count + 1));

	/* The oldest ID cannot change until the scan count goes to zero. */
	prev_oldest_id = txn_global->oldest_id;
	current_id = oldest_id = last_running = txn_global->current;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Update the oldest ID.
		 *
		 * Ignore: IDs older than the oldest ID we saw. This can happen
		 * if we race with a thread that is allocating an ID -- the ID
		 * will not be used because the thread will keep spinning until
		 * it gets a valid one.
		 */
		if ((id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id) &&
		    WT_TXNID_LT(id, last_running))
			last_running = id;

		/*
		 * !!!
		 * Note: Don't ignore snap_min values older than the previous
		 * oldest ID.  Read-uncommitted operations publish snap_min
		 * values without incrementing scan_count to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for
		 * more details.
		 */
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, oldest_id)) {
			oldest_id = id;
			oldest_session = &conn->sessions[i];
		}
	}

	if (WT_TXNID_LT(last_running, oldest_id))
		oldest_id = last_running;

	/* The oldest ID can't move past any named snapshots. */
	if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
	    WT_TXNID_LT(id, oldest_id))
		oldest_id = id;

	/* Update the last running ID. */
	last_running_moved =
	    WT_TXNID_LT(txn_global->last_running, last_running);

	/* Update the oldest ID. */
	if ((WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) &&
	    WT_ATOMIC_CAS4(txn_global->scan_count, 1, -1)) {
		WT_ORDERED_READ(session_cnt, conn->session_cnt);
		for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
			if ((id = s->id) != WT_TXN_NONE &&
			    WT_TXNID_LT(id, last_running))
				last_running = id;
			if ((id = s->snap_min) != WT_TXN_NONE &&
			    WT_TXNID_LT(id, oldest_id))
				oldest_id = id;
		}

		if (WT_TXNID_LT(last_running, oldest_id))
			oldest_id = last_running;

#ifdef HAVE_DIAGNOSTIC
		/*
		 * Make sure the ID doesn't move past any named snapshots.
		 *
		 * Don't include the read/assignment in the assert statement.
		 * Coverity complains if there are assignments only done in
		 * diagnostic builds, and when the read is from a volatile.
		 */
		id = txn_global->nsnap_oldest_id;
		WT_ASSERT(session,
		    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
#endif
		if (WT_TXNID_LT(txn_global->last_running, last_running))
			txn_global->last_running = last_running;
		if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
			txn_global->oldest_id = oldest_id;
		WT_ASSERT(session, txn_global->scan_count == -1);
		txn_global->scan_count = 0;
	} else {
		if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
		    current_id - oldest_id > 10000 && last_running_moved &&
		    oldest_session != NULL) {
			(void)__wt_verbose(session, WT_VERB_TRANSACTION,
			    "old snapshot %" PRIu64
			    " pinned in session %d [%s]"
			    " with snap_min %" PRIu64 "\n",
			    oldest_id, oldest_session->id,
			    oldest_session->lastop,
			    oldest_session->txn.snap_min);
		}
		WT_ASSERT(session, txn_global->scan_count > 0);
		(void)WT_ATOMIC_SUB4(txn_global->scan_count, 1);
	}
}
Exemplo n.º 6
0
Arquivo: txn.c Projeto: To4e/mongo
/*
 * __wt_txn_get_snapshot --
 *	Allocate a snapshot.
 */
void
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	uint64_t current_id, id;
	uint64_t prev_oldest_id, snap_min;
	uint32_t i, n, session_cnt;
	int32_t count;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = WT_SESSION_TXN_STATE(session);

	/*
	 * We're going to scan.  Increment the count of scanners to prevent the
	 * oldest ID from moving forwards.  Spin if the count is negative,
	 * which indicates that some thread is moving the oldest ID forwards.
	 */
	do {
		if ((count = txn_global->scan_count) < 0)
			WT_PAUSE();
	} while (count < 0 ||
	    !WT_ATOMIC_CAS4(txn_global->scan_count, count, count + 1));

	current_id = snap_min = txn_global->current;
	prev_oldest_id = txn_global->oldest_id;

	/* For pure read-only workloads, avoid scanning. */
	if (prev_oldest_id == current_id) {
		txn_state->snap_min = current_id;
		__txn_sort_snapshot(session, 0, current_id);

		/* Check that the oldest ID has not moved in the meantime. */
		if (prev_oldest_id == txn_global->oldest_id) {
			WT_ASSERT(session, txn_global->scan_count > 0);
			(void)WT_ATOMIC_SUB4(txn_global->scan_count, 1);
			return;
		}
	}

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Build our snapshot of any concurrent transaction IDs.
		 *
		 * Ignore:
		 *  - Our own ID: we always read our own updates.
		 *  - The ID if it is older than the oldest ID we saw. This
		 *    can happen if we race with a thread that is allocating
		 *    an ID -- the ID will not be used because the thread will
		 *    keep spinning until it gets a valid one.
		 */
		if (s != txn_state &&
		    (id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id)) {
			txn->snapshot[n++] = id;
			if (WT_TXNID_LT(id, snap_min))
				snap_min = id;
		}
	}

	/*
	 * If we got a new snapshot, update the published snap_min for this
	 * session.
	 */
	WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min));
	WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
	txn_state->snap_min = snap_min;

	WT_ASSERT(session, txn_global->scan_count > 0);
	(void)WT_ATOMIC_SUB4(txn_global->scan_count, 1);

	__txn_sort_snapshot(session, n, current_id);
}
Exemplo n.º 7
0
/*
 * __wt_btcur_prev --
 *	Move to the previous record in the tree.
 */
int
__wt_btcur_prev(WT_CURSOR_BTREE *cbt, int discard)
{
	WT_DECL_RET;
	WT_PAGE *page;
	WT_SESSION_IMPL *session;
	uint32_t flags;
	int newpage;

	session = (WT_SESSION_IMPL *)cbt->iface.session;
	WT_DSTAT_INCR(session, cursor_prev);

	flags = WT_TREE_SKIP_INTL | WT_TREE_PREV;	/* Tree walk flags. */
	if (discard)
		LF_SET(WT_TREE_DISCARD);

retry:	WT_RET(__cursor_func_init(cbt, 0));
	__cursor_position_clear(cbt);

	/*
	 * If we aren't already iterating in the right direction, there's
	 * some setup to do.
	 */
	if (!F_ISSET(cbt, WT_CBT_ITERATE_PREV))
		__wt_btcur_iterate_setup(cbt, 0);

	/*
	 * If this is a modification, we're about to read information from the
	 * page, save the write generation.
	 */
	page = cbt->page;
	if (discard && page != NULL) {
		WT_ERR(__wt_page_modify_init(session, page));
		WT_ORDERED_READ(cbt->write_gen, page->modify->write_gen);
	}

	/*
	 * Walk any page we're holding until the underlying call returns not-
	 * found.  Then, move to the previous page, until we reach the start
	 * of the file.
	 */
	for (newpage = 0;; newpage = 1) {
		if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
			switch (page->type) {
			case WT_PAGE_COL_FIX:
				ret = __cursor_fix_append_prev(cbt, newpage);
				break;
			case WT_PAGE_COL_VAR:
				ret = __cursor_var_append_prev(cbt, newpage);
				break;
			WT_ILLEGAL_VALUE_ERR(session);
			}
			if (ret == 0)
				break;
			F_CLR(cbt, WT_CBT_ITERATE_APPEND);
			if (ret != WT_NOTFOUND)
				break;
			newpage = 1;
		}
		if (page != NULL) {
			switch (page->type) {
			case WT_PAGE_COL_FIX:
				ret = __cursor_fix_prev(cbt, newpage);
				break;
			case WT_PAGE_COL_VAR:
				ret = __cursor_var_prev(cbt, newpage);
				break;
			case WT_PAGE_ROW_LEAF:
				ret = __cursor_row_prev(cbt, newpage);
				break;
			WT_ILLEGAL_VALUE_ERR(session);
			}
			if (ret != WT_NOTFOUND)
				break;
		}

		cbt->page = NULL;
		WT_ERR(__wt_tree_walk(session, &page, flags));
		WT_ERR_TEST(page == NULL, WT_NOTFOUND);
		WT_ASSERT(session,
		    page->type != WT_PAGE_COL_INT &&
		    page->type != WT_PAGE_ROW_INT);
		cbt->page = page;

		/* Initialize the page's modification information */
		if (discard) {
			WT_ERR(__wt_page_modify_init(session, page));
			WT_ORDERED_READ(
			    cbt->write_gen, page->modify->write_gen);
		}

		/*
		 * The last page in a column-store has appended entries.
		 * We handle it separately from the usual cursor code:
		 * it's only that one page and it's in a simple format.
		 */
		if (page->type != WT_PAGE_ROW_LEAF &&
		    (cbt->ins_head = WT_COL_APPEND(page)) != NULL)
			F_SET(cbt, WT_CBT_ITERATE_APPEND);
	}

err:	if (ret == WT_RESTART)
		goto retry;
	WT_TRET(__cursor_func_resolve(cbt, ret));
	return (ret);
}
Exemplo n.º 8
0
/*
 * __txn_oldest_scan --
 *	Sweep the running transactions to calculate the oldest ID required.
 */
static void
__txn_oldest_scan(WT_SESSION_IMPL *session,
    uint64_t *oldest_idp, uint64_t *last_runningp,
    WT_SESSION_IMPL **oldest_sessionp)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t id, last_running, oldest_id, prev_oldest_id;
	uint32_t i, session_cnt;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	oldest_session = NULL;

	/* The oldest ID cannot change while we are holding the scan lock. */
	prev_oldest_id = txn_global->oldest_id;
	oldest_id = last_running = txn_global->current;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Update the oldest ID.
		 *
		 * Ignore: IDs older than the oldest ID we saw. This can happen
		 * if we race with a thread that is allocating an ID -- the ID
		 * will not be used because the thread will keep spinning until
		 * it gets a valid one.
		 */
		if ((id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id) &&
		    WT_TXNID_LT(id, last_running))
			last_running = id;

		/*
		 * !!!
		 * Note: Don't ignore snap_min values older than the previous
		 * oldest ID.  Read-uncommitted operations publish snap_min
		 * values without acquiring the scan lock to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for
		 * more details.
		 */
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, oldest_id)) {
			oldest_id = id;
			oldest_session = &conn->sessions[i];
		}
	}

	if (WT_TXNID_LT(last_running, oldest_id))
		oldest_id = last_running;

	/* The oldest ID can't move past any named snapshots. */
	if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
	    WT_TXNID_LT(id, oldest_id))
		oldest_id = id;

	*oldest_idp = oldest_id;
	*oldest_sessionp = oldest_session;
	*last_runningp = last_running;
}
Exemplo n.º 9
0
/*
 * __wt_txn_get_snapshot --
 *	Allocate a snapshot.
 */
int
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	uint64_t current_id, id;
	uint64_t prev_oldest_id, snap_min;
	uint32_t i, n, session_cnt;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = WT_SESSION_TXN_STATE(session);
	n = 0;

	/*
	 * Spin waiting for the lock: the sleeps in our blocking readlock
	 * implementation are too slow for scanning the transaction table.
	 */
	while ((ret =
	    __wt_try_readlock(session, txn_global->scan_rwlock)) == EBUSY)
		WT_PAUSE();
	WT_RET(ret);

	current_id = snap_min = txn_global->current;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * Include the checkpoint transaction, if one is running: we should
	 * ignore any uncommitted changes the checkpoint has written to the
	 * metadata.  We don't have to keep the checkpoint's changes pinned so
	 * don't including it in the published snap_min.
	 */
	if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE)
		txn->snapshot[n++] = id;

	/* For pure read-only workloads, avoid scanning. */
	if (prev_oldest_id == current_id) {
		txn_state->snap_min = current_id;
		/* Check that the oldest ID has not moved in the meantime. */
		WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
		goto done;
	}

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Build our snapshot of any concurrent transaction IDs.
		 *
		 * Ignore:
		 *  - Our own ID: we always read our own updates.
		 *  - The ID if it is older than the oldest ID we saw. This
		 *    can happen if we race with a thread that is allocating
		 *    an ID -- the ID will not be used because the thread will
		 *    keep spinning until it gets a valid one.
		 */
		if (s != txn_state &&
		    (id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id)) {
			txn->snapshot[n++] = id;
			if (WT_TXNID_LT(id, snap_min))
				snap_min = id;
		}
	}

	/*
	 * If we got a new snapshot, update the published snap_min for this
	 * session.
	 */
	WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min));
	WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
	txn_state->snap_min = snap_min;

done:	__wt_readunlock(session, txn_global->scan_rwlock);
	__txn_sort_snapshot(session, n, current_id);
	return (0);
}
Exemplo n.º 10
0
/*
 * __wt_txn_refresh --
 *	Allocate a transaction ID and/or a snapshot.
 */
void
__wt_txn_refresh(WT_SESSION_IMPL *session, uint64_t max_id, int get_snapshot)
{
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	uint64_t current_id, id, snap_min, oldest_id, prev_oldest_id;
	uint32_t i, n, session_cnt;
	int32_t count;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = &txn_global->states[session->id];

	prev_oldest_id = txn_global->oldest_id;
	current_id = snap_min = txn_global->current;

	/* For pure read-only workloads, avoid updates to shared state. */
	if (!get_snapshot) {
		/*
		 * If we are trying to update the oldest ID and it is already
		 * equal to the current ID, there is no point scanning.
		 */
		if (prev_oldest_id == current_id)
			return;
	} else if (txn->id == max_id &&
	    txn->snapshot_count == 0 &&
	    txn->snap_min == snap_min &&
	    TXNID_LE(prev_oldest_id, snap_min)) {
		txn_state->snap_min = txn->snap_min;
		/* If nothing has changed in the meantime, we're done. */
		if (txn_global->scan_count == 0 &&
		    txn_global->oldest_id == prev_oldest_id)
			return;
	}

	/*
	 * We're going to scan.  Increment the count of scanners to prevent the
	 * oldest ID from moving forwards.  Spin if the count is negative,
	 * which indicates that some thread is moving the oldest ID forwards.
	 */
	do {
		if ((count = txn_global->scan_count) < 0)
			WT_PAUSE();
	} while (count < 0 ||
	    !WT_ATOMIC_CAS(txn_global->scan_count, count, count + 1));

	/* The oldest ID cannot change until the scan count goes to zero. */
	prev_oldest_id = txn_global->oldest_id;
	current_id = snap_min = txn_global->current;

	/* If the maximum ID is constrained, so is the oldest. */
	oldest_id = (max_id != WT_TXN_NONE) ? max_id : snap_min;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Ignore the ID if we are committing (indicated by max_id
		 * being set): it is about to be released.
		 *
		 * Also ignore the ID if it is older than the oldest ID we saw.
		 * This can happen if we race with a thread that is allocating
		 * an ID -- the ID will not be used because the thread will
		 * keep spinning until it gets a valid one.
		 */
		if ((id = s->id) != WT_TXN_NONE && id + 1 != max_id &&
		    TXNID_LE(prev_oldest_id, id)) {
			if (get_snapshot)
				txn->snapshot[n++] = id;
			if (TXNID_LT(id, snap_min))
				snap_min = id;
		}

		/*
		 * Ignore the session's own snap_min if we are in the process
		 * of updating it.
		 */
		if (get_snapshot && s == txn_state)
			continue;

		/*
		 * !!!
		 * Note: Don't ignore snap_min values older than the previous
		 * oldest ID.  Read-uncommitted operations publish snap_min
		 * values without incrementing scan_count to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for
		 * more details.
		 */
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    TXNID_LT(id, oldest_id))
			oldest_id = id;
	}

	if (TXNID_LT(snap_min, oldest_id))
		oldest_id = snap_min;

	if (get_snapshot) {
		WT_ASSERT(session, TXNID_LE(prev_oldest_id, snap_min));
		WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
		txn_state->snap_min = snap_min;
	}

	/*
	 * Update the last running ID if we have a much newer value or we are
	 * forcing an update.
	 */
	if (!get_snapshot || snap_min > txn_global->last_running + 100)
		txn_global->last_running = snap_min;

	/*
	 * Update the oldest ID if we have a newer ID and we can get exclusive
	 * access.  During normal snapshot refresh, only do this if we have a
	 * much newer value.  Once we get exclusive access, do another pass to
	 * make sure nobody else is using an earlier ID.
	 */
	if (max_id == WT_TXN_NONE &&
	    TXNID_LT(prev_oldest_id, oldest_id) &&
	    (!get_snapshot || oldest_id - prev_oldest_id > 100) &&
	    WT_ATOMIC_CAS(txn_global->scan_count, 1, -1)) {
		WT_ORDERED_READ(session_cnt, conn->session_cnt);
		for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
			if ((id = s->id) != WT_TXN_NONE &&
			    TXNID_LT(id, oldest_id))
				oldest_id = id;
			if ((id = s->snap_min) != WT_TXN_NONE &&
			    TXNID_LT(id, oldest_id))
				oldest_id = id;
		}
		if (TXNID_LT(txn_global->oldest_id, oldest_id))
			txn_global->oldest_id = oldest_id;
		txn_global->scan_count = 0;
	} else {
		WT_ASSERT(session, txn_global->scan_count > 0);
		(void)WT_ATOMIC_SUB(txn_global->scan_count, 1);
	}

	if (get_snapshot)
		__txn_sort_snapshot(session, n, current_id);
}
Exemplo n.º 11
0
/*
 * __txn_oldest_scan --
 *	Sweep the running transactions to calculate the oldest ID required.
 */
static void
__txn_oldest_scan(WT_SESSION_IMPL *session,
    uint64_t *oldest_idp, uint64_t *last_runningp, uint64_t *metadata_pinnedp,
    WT_SESSION_IMPL **oldest_sessionp)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t id, last_running, metadata_pinned, oldest_id, prev_oldest_id;
	uint32_t i, session_cnt;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	oldest_session = NULL;

	/* The oldest ID cannot change while we are holding the scan lock. */
	prev_oldest_id = txn_global->oldest_id;
	last_running = oldest_id = txn_global->current;
	if ((metadata_pinned = txn_global->checkpoint_state.id) == WT_TXN_NONE)
		metadata_pinned = oldest_id;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/* Update the last running transaction ID. */
		if ((id = s->id) != WT_TXN_NONE &&
		    WT_TXNID_LE(prev_oldest_id, id) &&
		    WT_TXNID_LT(id, last_running))
			last_running = id;

		/* Update the metadata pinned ID. */
		if ((id = s->metadata_pinned) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, metadata_pinned))
			metadata_pinned = id;

		/*
		 * !!!
		 * Note: Don't ignore pinned ID values older than the previous
		 * oldest ID.  Read-uncommitted operations publish pinned ID
		 * values without acquiring the scan lock to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for more
		 * details.
		 */
		if ((id = s->pinned_id) != WT_TXN_NONE &&
		    WT_TXNID_LT(id, oldest_id)) {
			oldest_id = id;
			oldest_session = &conn->sessions[i];
		}
	}

	if (WT_TXNID_LT(last_running, oldest_id))
		oldest_id = last_running;

	/* The oldest ID can't move past any named snapshots. */
	if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
	    WT_TXNID_LT(id, oldest_id))
		oldest_id = id;

	/* The metadata pinned ID can't move past the oldest ID. */
	if (WT_TXNID_LT(oldest_id, metadata_pinned))
		metadata_pinned = oldest_id;

	*last_runningp = last_running;
	*metadata_pinnedp = metadata_pinned;
	*oldest_idp = oldest_id;
	*oldest_sessionp = oldest_session;
}
Exemplo n.º 12
0
/*
 * __wt_verbose_dump_txn --
 *	Output diagnostic information about the global transaction state.
 */
int
__wt_verbose_dump_txn(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_SESSION_IMPL *sess;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s;
	uint64_t id;
	uint32_t i, session_cnt;
#ifdef HAVE_TIMESTAMPS
	char hex_timestamp[3][2 * WT_TIMESTAMP_SIZE + 1];
#endif

	conn = S2C(session);
	txn_global = &conn->txn_global;

	WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
	WT_RET(__wt_msg(session, "transaction state dump"));

	WT_RET(__wt_msg(session, "current ID: %" PRIu64, txn_global->current));
	WT_RET(__wt_msg(session,
	    "last running ID: %" PRIu64, txn_global->last_running));
	WT_RET(__wt_msg(session, "oldest ID: %" PRIu64, txn_global->oldest_id));

#ifdef HAVE_TIMESTAMPS
	WT_RET(__wt_timestamp_to_hex_string(
	    session, hex_timestamp[0], &txn_global->commit_timestamp));
	WT_RET(__wt_msg(session, "commit timestamp: %s", hex_timestamp[0]));
	WT_RET(__wt_timestamp_to_hex_string(
	    session, hex_timestamp[0], &txn_global->oldest_timestamp));
	WT_RET(__wt_msg(session, "oldest timestamp: %s", hex_timestamp[0]));
	WT_RET(__wt_timestamp_to_hex_string(
	    session, hex_timestamp[0], &txn_global->pinned_timestamp));
	WT_RET(__wt_msg(session, "pinned timestamp: %s", hex_timestamp[0]));
	WT_RET(__wt_timestamp_to_hex_string(
	    session, hex_timestamp[0], &txn_global->stable_timestamp));
	WT_RET(__wt_msg(session, "stable timestamp: %s", hex_timestamp[0]));
	WT_RET(__wt_msg(session, "has_commit_timestamp: %s",
	    txn_global->has_commit_timestamp ? "yes" : "no"));
	WT_RET(__wt_msg(session, "has_oldest_timestamp: %s",
	    txn_global->has_oldest_timestamp ? "yes" : "no"));
	WT_RET(__wt_msg(session, "has_pinned_timestamp: %s",
	    txn_global->has_pinned_timestamp ? "yes" : "no"));
	WT_RET(__wt_msg(session, "has_stable_timestamp: %s",
	    txn_global->has_stable_timestamp ? "yes" : "no"));
	WT_RET(__wt_msg(session, "oldest_is_pinned: %s",
	    txn_global->oldest_is_pinned ? "yes" : "no"));
	WT_RET(__wt_msg(session, "stable_is_pinned: %s",
	    txn_global->stable_is_pinned ? "yes" : "no"));
#endif

	WT_RET(__wt_msg(session, "checkpoint running: %s",
	    txn_global->checkpoint_running ? "yes" : "no"));
	WT_RET(__wt_msg(session, "checkpoint generation: %" PRIu64,
	    __wt_gen(session, WT_GEN_CHECKPOINT)));
	WT_RET(__wt_msg(session, "checkpoint pinned ID: %" PRIu64,
	    txn_global->checkpoint_state.pinned_id));
	WT_RET(__wt_msg(session, "checkpoint txn ID: %" PRIu64,
	    txn_global->checkpoint_state.id));

	WT_RET(__wt_msg(session,
	    "oldest named snapshot ID: %" PRIu64, txn_global->nsnap_oldest_id));

	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	WT_RET(__wt_msg(session, "session count: %" PRIu32, session_cnt));
	WT_RET(__wt_msg(session, "Transaction state of active sessions:"));

	/*
	 * Walk each session transaction state and dump information. Accessing
	 * the content of session handles is not thread safe, so some
	 * information may change while traversing if other threads are active
	 * at the same time, which is OK since this is diagnostic code.
	 */
	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/* Skip sessions with no active transaction */
		if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE)
			continue;
		sess = &conn->sessions[i];
		WT_RET(__wt_msg(session,
		    "ID: %" PRIu64
		    ", pinned ID: %" PRIu64
		    ", metadata pinned ID: %" PRIu64
		    ", name: %s",
		    id, s->pinned_id, s->metadata_pinned,
		    sess->name == NULL ?
		    "EMPTY" : sess->name));
		WT_RET(__wt_verbose_dump_txn_one(sess, &sess->txn));
	}

	return (0);
}
Exemplo n.º 13
0
/*
 * __wt_txn_begin --
 *	Begin a transaction.
 */
int
__wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	wt_txnid_t id, oldest_snap_min;
	uint32_t i, n, session_cnt;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = &txn_global->states[session->id];

	WT_ASSERT(session, txn_state->id == WT_TXN_NONE);

	WT_RET(__wt_config_gets_defno(session, cfg, "isolation", &cval));
	if (cval.len == 0)
		txn->isolation = session->isolation;
	else
		txn->isolation =
		    WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
		    TXN_ISO_SNAPSHOT :
		    WT_STRING_MATCH("read-committed", cval.str, cval.len) ?
		    TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED;

	F_SET(txn, TXN_RUNNING);

	do {
		/*
		 * Allocate a transaction ID.
		 *
		 * We use an atomic increment to ensure that we get a unique
		 * ID, then publish that to the global state table.
		 *
		 * If two threads race to allocate an ID, only the latest ID
		 * will proceed.  The winning thread can be sure its snapshot
		 * contains all of the earlier active IDs.  Threads that race
		 * and get an earlier ID may not appear in the snapshot,
		 * but they will loop and allocate a new ID before proceeding
		 * to make any updates.
		 *
		 * This potentially wastes transaction IDs when threads race to
		 * begin transactions, but that is the price we pay to keep
		 * this path latch free.
		 */
		do {
			txn->id = WT_ATOMIC_ADD(txn_global->current, 1);
		} while (txn->id == WT_TXN_NONE || txn->id == WT_TXN_ABORTED);
		WT_PUBLISH(txn_state->id, txn->id);

		/*
		 * If we are starting a snapshot isolation transaction, get
		 * a snapshot of the running transactions.
		 *
		 * If we already have a snapshot (e.g., for an auto-commit
		 * operation), update it so that the newly-allocated ID is
		 * visible.
		 */
		if (txn->isolation == TXN_ISO_SNAPSHOT) {
			txn->last_gen = txn->last_oldest_gen = txn_global->gen;
			oldest_snap_min = txn->id;

			/* Copy the array of concurrent transactions. */
			WT_ORDERED_READ(session_cnt, conn->session_cnt);
			for (i = n = 0, s = txn_global->states;
			    i < session_cnt;
			    i++, s++) {
				if ((id = s->snap_min) != WT_TXN_NONE)
					if (TXNID_LT(id, oldest_snap_min))
						oldest_snap_min = id;
				if ((id = s->id) == WT_TXN_NONE)
					continue;
				else
					txn->snapshot[n++] = id;
			}

			__txn_sort_snapshot(
			    session, n, txn->id, oldest_snap_min);
			txn_state->snap_min = txn->snap_min;
		}

		/*
		 * Ensure the snapshot reads are complete before re-checking
		 * the global current ID.
		 */
		WT_READ_BARRIER();
	} while (txn->id != txn_global->current);

	return (0);
}