Beispiel #1
0
/*
 * __wt_spin_lock_register_lock --
 *	Add a lock to the connection's list.
 */
int
__wt_spin_lock_register_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
{
	WT_CONNECTION_IMPL *conn;
	u_int i;

	/*
	 * There is a spinlock we initialize before we have a connection, the
	 * global library lock.  In that case, the session will be NULL and
	 * we can't track the lock.
	 */
	if (session == NULL)
		return (0);

	conn = S2C(session);

	for (i = 0; i < WT_SPINLOCK_MAX; i++)
		if (conn->spinlock_list[i] == NULL &&
		    WT_ATOMIC_CAS(conn->spinlock_list[i], NULL, t))
			return (0);

	WT_RET_MSG(session, ENOMEM,
	    "spinlock connection registry failed, increase the connection's "
	    "spinlock list size");
}
Beispiel #2
0
/*
 * __spin_lock_next_id --
 *	Return the next spinlock caller ID.
 */
static int
__spin_lock_next_id(WT_SESSION_IMPL *session, int *idp)
{
	static int lock_id = 0, next_id = 0;
	WT_DECL_RET;

	/* If we've ever registered this location, we already have an ID. */
	if (*idp != WT_SPINLOCK_REGISTER)
		return (0);

	/*
	 * We can't use the global spinlock to lock the ID allocation (duh!),
	 * use a CAS instruction to serialize access to a local variable.
	 * This work only gets done once per library instantiation, there
	 * isn't a performance concern.
	 */
	while (!WT_ATOMIC_CAS(lock_id, 0, 1))
		__wt_yield();

	/* Allocate a blocking ID for this location. */
	if (*idp == WT_SPINLOCK_REGISTER) {
		if (next_id < WT_SPINLOCK_MAX_LOCATION_ID)
			*idp = next_id++;
		else
			WT_ERR_MSG(session, ENOMEM,
			    "spinlock caller location registry failed, "
			    "increase the connection's blocking matrix size");
	}

err:	WT_PUBLISH(lock_id, 0);
	return (ret);
}
Beispiel #3
0
/*
 * __wt_cond_signal --
 *	Signal a waiting thread.
 */
int
__wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond)
{
	WT_DECL_RET;
	int locked;

	locked = 0;

	/*
	 * !!!
	 * This function MUST handle a NULL session handle.
	 */
	if (session != NULL && WT_VERBOSE_ISSET(session, mutex))
		WT_RET(__wt_verbose(
		    session, "signal %s cond (%p)", cond->name, cond));

	/* Fast path if already signalled. */
	if (cond->waiters == -1)
		return (0);

	if (cond->waiters > 0 || !WT_ATOMIC_CAS(cond->waiters, 0, -1)) {
		WT_ERR(pthread_mutex_lock(&cond->mtx));
		locked = 1;
		WT_ERR(pthread_cond_broadcast(&cond->cond));
	}

err:	if (locked)
		WT_TRET(pthread_mutex_unlock(&cond->mtx));
	if (ret == 0)
		return (0);
	WT_RET_MSG(session, ret, "pthread_cond_broadcast");
}
Beispiel #4
0
/*
 * __wt_update_obsolete_check --
 *	Check for obsolete updates.
 */
WT_UPDATE *
__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
	WT_UPDATE *first, *next;

	/*
	 * This function identifies obsolete updates, and truncates them from
	 * the rest of the chain; because this routine is called from inside
	 * a serialization function, the caller has responsibility for actually
	 * freeing the memory.
	 *
	 * Walk the list of updates, looking for obsolete updates at the end.
	 */
	for (first = NULL; upd != NULL; upd = upd->next)
		if (__wt_txn_visible_all(session, upd->txnid)) {
			if (first == NULL)
				first = upd;
		} else if (upd->txnid != WT_TXN_ABORTED)
			first = NULL;

	/*
	 * We cannot discard this WT_UPDATE structure, we can only discard
	 * WT_UPDATE structures subsequent to it, other threads of control will
	 * terminate their walk in this element.  Save a reference to the list
	 * we will discard, and terminate the list.
	 */
	if (first != NULL &&
	    (next = first->next) != NULL &&
	    WT_ATOMIC_CAS(first->next, next, NULL))
		return (next);

	return (NULL);
}
Beispiel #5
0
/*
 * __hazard_exclusive --
 *	Request exclusive access to a page.
 */
static int
__hazard_exclusive(WT_SESSION_IMPL *session, WT_REF *ref, int top)
{
	/*
	 * Make sure there is space to track exclusive access so we can unlock
	 * to clean up.
	 */
	WT_RET(__wt_realloc_def(session, &session->excl_allocated,
	    session->excl_next + 1, &session->excl));

	/*
	 * Hazard pointers are acquired down the tree, which means we can't
	 * deadlock.
	 *
	 * Request exclusive access to the page.  The top-level page should
	 * already be in the locked state, lock child pages in memory.
	 * If another thread already has this page, give up.
	 */
	if (!top && !WT_ATOMIC_CAS(ref->state, WT_REF_MEM, WT_REF_LOCKED))
		return (EBUSY);	/* We couldn't change the state. */
	WT_ASSERT(session, ref->state == WT_REF_LOCKED);

	session->excl[session->excl_next++] = ref;

	/* Check for a matching hazard pointer. */
	if (__wt_page_hazard_check(session, ref->page) == NULL)
		return (0);

	WT_STAT_FAST_DATA_INCR(session, cache_eviction_hazard);
	WT_STAT_FAST_CONN_INCR(session, cache_eviction_hazard);

	WT_VERBOSE_RET(
	    session, evict, "page %p hazard request failed", ref->page);
	return (EBUSY);
}
Beispiel #6
0
/*
 * __wt_txn_begin --
 *	Begin a transaction.
 */
int
__wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *txn_state;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = &txn_global->states[session->id];

	WT_ASSERT(session, txn_state->id == WT_TXN_NONE);

	WT_RET(__wt_config_gets_def(session, cfg, "isolation", 0, &cval));
	if (cval.len == 0)
		txn->isolation = session->isolation;
	else
		txn->isolation =
		    WT_STRING_MATCH("snapshot", cval.str, cval.len) ?
		    TXN_ISO_SNAPSHOT :
		    WT_STRING_MATCH("read-committed", cval.str, cval.len) ?
		    TXN_ISO_READ_COMMITTED : TXN_ISO_READ_UNCOMMITTED;

	/*
	 * Allocate a transaction ID.
	 *
	 * We use an atomic compare and swap to ensure that we get a
	 * unique ID that is published before the global counter is
	 * updated.
	 *
	 * If two threads race to allocate an ID, only the latest ID
	 * will proceed.  The winning thread can be sure its snapshot
	 * contains all of the earlier active IDs.  Threads that race
	 * and get an earlier ID may not appear in the snapshot, but
	 * they will loop and allocate a new ID before proceeding to
	 * make any updates.
	 *
	 * This potentially wastes transaction IDs when threads race to
	 * begin transactions: that is the price we pay to keep this
	 * path latch free.
	 */
	do {
		txn_state->id = txn->id = txn_global->current;
	} while (!WT_ATOMIC_CAS(txn_global->current, txn->id, txn->id + 1));

	/*
	 * If we have used 64-bits of transaction IDs, there is nothing
	 * more we can do.
	 */
	if (txn->id == WT_TXN_ABORTED)
		WT_RET_MSG(session, ENOMEM, "Out of transaction IDs");

	F_SET(txn, TXN_RUNNING);
	if (txn->isolation == TXN_ISO_SNAPSHOT)
		__wt_txn_refresh(session, WT_TXN_NONE, 1);
	return (0);
}
Beispiel #7
0
/*
 * __lsm_bloom_work --
 *	Try to create a Bloom filter for the newest on-disk chunk.
 */
static int
__lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_WORKER_COOKIE cookie;
	u_int i;

	WT_CLEAR(cookie);
	/* If no work is done, tell our caller by returning WT_NOTFOUND. */
	ret = WT_NOTFOUND;

	WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, 0));

	/* Create bloom filters in all checkpointed chunks. */
	for (i = 0; i < cookie.nchunks; i++) {
		chunk = cookie.chunk_array[i];

		/*
		 * Skip if a thread is still active in the chunk or it
		 * isn't suitable.
		 */
		if (!F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK) ||
		    F_ISSET_ATOMIC(chunk,
			WT_LSM_CHUNK_BLOOM | WT_LSM_CHUNK_MERGING) ||
		    chunk->generation > 0 ||
		    chunk->count == 0)
			continue;

		/* See if we win the race to switch on the "busy" flag. */
		if (WT_ATOMIC_CAS(chunk->bloom_busy, 0, 1)) {
			ret = __lsm_bloom_create(
			    session, lsm_tree, chunk, (u_int)i);
			chunk->bloom_busy = 0;
			break;
		}
	}

	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);
	return (ret);
}
Beispiel #8
0
/*
 * __wt_update_obsolete_check --
 *	Check for obsolete updates.
 */
WT_UPDATE *
__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
	WT_TXN *txn;
	WT_UPDATE *next;

	/*
	 * This function identifies obsolete updates, and truncates them from
	 * the rest of the chain; because this routine is called from inside
	 * a serialization function, the caller has responsibility for actually
	 * freeing the memory.
	 */
	txn = &session->txn;
	if (txn->isolation != TXN_ISO_SNAPSHOT &&
	    txn->isolation != TXN_ISO_READ_COMMITTED)
		return (NULL);

	/*
	 * Walk the list of updates, looking for obsolete updates.  If we find
	 * an update no session will ever move past, we can discard any updates
	 * that appear after it.
	 */
	for (; upd != NULL; upd = upd->next)
		if (__wt_txn_visible_all(session, upd->txnid)) {
			/*
			 * We cannot discard this WT_UPDATE structure, we can
			 * only discard WT_UPDATE structures subsequent to it,
			 * other threads of control will terminate their walk
			 * in this element.  Save a reference to the list we
			 * will discard, and terminate the list.
			 */
			if ((next = upd->next) == NULL)
				return (NULL);
			if (!WT_ATOMIC_CAS(upd->next, next, NULL))
				return (NULL);

			return (next);
		}
	return (NULL);
}
Beispiel #9
0
/*
 * __wt_txn_refresh --
 *	Allocate a transaction ID and/or a snapshot.
 */
void
__wt_txn_refresh(WT_SESSION_IMPL *session, uint64_t max_id, int get_snapshot)
{
	WT_CONNECTION_IMPL *conn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_STATE *s, *txn_state;
	uint64_t current_id, id, snap_min, oldest_id, prev_oldest_id;
	uint32_t i, n, session_cnt;
	int32_t count;

	conn = S2C(session);
	txn = &session->txn;
	txn_global = &conn->txn_global;
	txn_state = &txn_global->states[session->id];

	prev_oldest_id = txn_global->oldest_id;
	current_id = snap_min = txn_global->current;

	/* For pure read-only workloads, avoid updates to shared state. */
	if (!get_snapshot) {
		/*
		 * If we are trying to update the oldest ID and it is already
		 * equal to the current ID, there is no point scanning.
		 */
		if (prev_oldest_id == current_id)
			return;
	} else if (txn->id == max_id &&
	    txn->snapshot_count == 0 &&
	    txn->snap_min == snap_min &&
	    TXNID_LE(prev_oldest_id, snap_min)) {
		txn_state->snap_min = txn->snap_min;
		/* If nothing has changed in the meantime, we're done. */
		if (txn_global->scan_count == 0 &&
		    txn_global->oldest_id == prev_oldest_id)
			return;
	}

	/*
	 * We're going to scan.  Increment the count of scanners to prevent the
	 * oldest ID from moving forwards.  Spin if the count is negative,
	 * which indicates that some thread is moving the oldest ID forwards.
	 */
	do {
		if ((count = txn_global->scan_count) < 0)
			WT_PAUSE();
	} while (count < 0 ||
	    !WT_ATOMIC_CAS(txn_global->scan_count, count, count + 1));

	/* The oldest ID cannot change until the scan count goes to zero. */
	prev_oldest_id = txn_global->oldest_id;
	current_id = snap_min = txn_global->current;

	/* If the maximum ID is constrained, so is the oldest. */
	oldest_id = (max_id != WT_TXN_NONE) ? max_id : snap_min;

	/* Walk the array of concurrent transactions. */
	WT_ORDERED_READ(session_cnt, conn->session_cnt);
	for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) {
		/*
		 * Ignore the ID if we are committing (indicated by max_id
		 * being set): it is about to be released.
		 *
		 * Also ignore the ID if it is older than the oldest ID we saw.
		 * This can happen if we race with a thread that is allocating
		 * an ID -- the ID will not be used because the thread will
		 * keep spinning until it gets a valid one.
		 */
		if ((id = s->id) != WT_TXN_NONE && id + 1 != max_id &&
		    TXNID_LE(prev_oldest_id, id)) {
			if (get_snapshot)
				txn->snapshot[n++] = id;
			if (TXNID_LT(id, snap_min))
				snap_min = id;
		}

		/*
		 * Ignore the session's own snap_min if we are in the process
		 * of updating it.
		 */
		if (get_snapshot && s == txn_state)
			continue;

		/*
		 * !!!
		 * Note: Don't ignore snap_min values older than the previous
		 * oldest ID.  Read-uncommitted operations publish snap_min
		 * values without incrementing scan_count to protect the global
		 * table.  See the comment in __wt_txn_cursor_op for
		 * more details.
		 */
		if ((id = s->snap_min) != WT_TXN_NONE &&
		    TXNID_LT(id, oldest_id))
			oldest_id = id;
	}

	if (TXNID_LT(snap_min, oldest_id))
		oldest_id = snap_min;

	if (get_snapshot) {
		WT_ASSERT(session, TXNID_LE(prev_oldest_id, snap_min));
		WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
		txn_state->snap_min = snap_min;
	}

	/*
	 * Update the last running ID if we have a much newer value or we are
	 * forcing an update.
	 */
	if (!get_snapshot || snap_min > txn_global->last_running + 100)
		txn_global->last_running = snap_min;

	/*
	 * Update the oldest ID if we have a newer ID and we can get exclusive
	 * access.  During normal snapshot refresh, only do this if we have a
	 * much newer value.  Once we get exclusive access, do another pass to
	 * make sure nobody else is using an earlier ID.
	 */
	if (max_id == WT_TXN_NONE &&
	    TXNID_LT(prev_oldest_id, oldest_id) &&
	    (!get_snapshot || oldest_id - prev_oldest_id > 100) &&
	    WT_ATOMIC_CAS(txn_global->scan_count, 1, -1)) {
		WT_ORDERED_READ(session_cnt, conn->session_cnt);
		for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
			if ((id = s->id) != WT_TXN_NONE &&
			    TXNID_LT(id, oldest_id))
				oldest_id = id;
			if ((id = s->snap_min) != WT_TXN_NONE &&
			    TXNID_LT(id, oldest_id))
				oldest_id = id;
		}
		if (TXNID_LT(txn_global->oldest_id, oldest_id))
			txn_global->oldest_id = oldest_id;
		txn_global->scan_count = 0;
	} else {
		WT_ASSERT(session, txn_global->scan_count > 0);
		(void)WT_ATOMIC_SUB(txn_global->scan_count, 1);
	}

	if (get_snapshot)
		__txn_sort_snapshot(session, n, current_id);
}
Beispiel #10
0
/*
 * __clsm_enter --
 *	Start an operation on an LSM cursor, update if the tree has changed.
 */
static inline int
__clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
{
	WT_CURSOR *c;
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_SESSION_IMPL *session;
	uint64_t *txnid_maxp;
	uint64_t id, myid, snap_min;

	session = (WT_SESSION_IMPL *)clsm->iface.session;

	/* Merge cursors never update. */
	if (F_ISSET(clsm, WT_CLSM_MERGE))
		return (0);

	if (reset) {
		c = &clsm->iface;
		/* Copy out data before resetting chunk cursors. */
		if (F_ISSET(c, WT_CURSTD_KEY_INT) &&
		    !WT_DATA_IN_ITEM(&c->key))
			WT_RET(__wt_buf_set(
			    session, &c->key, c->key.data, c->key.size));
		if (F_ISSET(c, WT_CURSTD_VALUE_INT) &&
		    !WT_DATA_IN_ITEM(&c->value))
			WT_RET(__wt_buf_set(
			    session, &c->value, c->value.data, c->value.size));
		WT_RET(__clsm_reset_cursors(clsm, NULL));
	}

	for (;;) {
		/*
		 * If the cursor looks up-to-date, check if the cache is full.
		 * In case this call blocks, the check will be repeated before
		 * proceeding.
		 */
		if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen)
			goto open;

		WT_RET(__wt_cache_full_check(session));

		if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen)
			goto open;

		/* Update the maximum transaction ID in the primary chunk. */
		if (update && (chunk = clsm->primary_chunk) != NULL) {
			WT_RET(__wt_txn_autocommit_check(session));
			for (id = chunk->txnid_max, myid = session->txn.id;
			    !TXNID_LE(myid, id);
			    id = chunk->txnid_max) {
				WT_ASSERT(session, myid != WT_TXN_NONE);
				(void)WT_ATOMIC_CAS(
				    chunk->txnid_max, id, myid);
			}
		}

		/*
		 * Figure out how many updates are required for snapshot
		 * isolation.
		 *
		 * This is not a normal visibility check on the maximum
		 * transaction ID in each chunk: any transaction ID that
		 * overlaps with our snapshot is a potential conflict.
		 */
		clsm->nupdates = 1;
		if (session->txn.isolation == TXN_ISO_SNAPSHOT &&
		    F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
			snap_min = session->txn.snap_min;
			for (txnid_maxp = &clsm->txnid_max[clsm->nchunks - 2];
			    clsm->nupdates < clsm->nchunks;
			    clsm->nupdates++, txnid_maxp--)
				if (TXNID_LT(*txnid_maxp, snap_min))
					break;
		}

		/*
		 * Stop when we are up-to-date, as long as this is:
		 *   - a snapshot isolation update and the cursor is set up for
		 *     that;
		 *   - an update operation with a primary chunk, or
		 *   - a read operation and the cursor is open for reading.
		 */
		if ((!update ||
		    session->txn.isolation != TXN_ISO_SNAPSHOT ||
		    F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) &&
		    ((update && clsm->primary_chunk != NULL) ||
		    (!update && F_ISSET(clsm, WT_CLSM_OPEN_READ))))
			break;

open:		WT_WITH_SCHEMA_LOCK(session,
		    ret = __clsm_open_cursors(clsm, update, 0, 0));
		WT_RET(ret);
	}

	if (!F_ISSET(clsm, WT_CLSM_ACTIVE)) {
		WT_RET(__cursor_enter(session));
		F_SET(clsm, WT_CLSM_ACTIVE);
	}

	return (0);
}