コード例 #1
0
ファイル: lsm_work_unit.c プロジェクト: 7segments/mongo-1
/*
 * __wt_lsm_work_bloom --
 *	Try to create a Bloom filter for the newest on-disk chunk that doesn't
 *	have one.
 */
int
__wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_WORKER_COOKIE cookie;
	u_int i, merge;

	WT_CLEAR(cookie);

	WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, 0));

	/* Create bloom filters in all checkpointed chunks. */
	merge = 0;
	for (i = 0; i < cookie.nchunks; i++) {
		chunk = cookie.chunk_array[i];

		/*
		 * Skip if a thread is still active in the chunk or it
		 * isn't suitable.
		 */
		if (!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ||
		    F_ISSET(chunk, WT_LSM_CHUNK_BLOOM | WT_LSM_CHUNK_MERGING) ||
		    chunk->generation > 0 ||
		    chunk->count == 0)
			continue;

		/*
		 * See if we win the race to switch on the "busy" flag and
		 * recheck that the chunk still needs a Bloom filter.
		 */
		if (WT_ATOMIC_CAS4(chunk->bloom_busy, 0, 1)) {
			if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
				ret = __lsm_bloom_create(
				    session, lsm_tree, chunk, (u_int)i);
				/*
				 * Record if we were successful so that we can
				 * later push a merge work unit.
				 */
				if (ret == 0)
					merge = 1;
			}
			chunk->bloom_busy = 0;
			break;
		}
	}
	/*
	 * If we created any bloom filters, we push a merge work unit now.
	 */
	if (merge)
		WT_ERR(__wt_lsm_manager_push_entry(
		    session, WT_LSM_WORK_MERGE, 0, lsm_tree));

err:
	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);
	return (ret);
}
コード例 #2
0
/*
 * __lsm_bloom_work --
 *	Try to create a Bloom filter for the newest on-disk chunk.
 */
static int
__lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_WORKER_COOKIE cookie;
	u_int i;

	WT_CLEAR(cookie);
	/* If no work is done, tell our caller by returning WT_NOTFOUND. */
	ret = WT_NOTFOUND;

	WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, 0));

	/* Create bloom filters in all checkpointed chunks. */
	for (i = 0; i < cookie.nchunks; i++) {
		chunk = cookie.chunk_array[i];

		/*
		 * Skip if a thread is still active in the chunk or it
		 * isn't suitable.
		 */
		if (!F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK) ||
		    F_ISSET_ATOMIC(chunk,
			WT_LSM_CHUNK_BLOOM | WT_LSM_CHUNK_MERGING) ||
		    chunk->generation > 0 ||
		    chunk->count == 0)
			continue;

		/* See if we win the race to switch on the "busy" flag. */
		if (WT_ATOMIC_CAS(chunk->bloom_busy, 0, 1)) {
			ret = __lsm_bloom_create(
			    session, lsm_tree, chunk, (u_int)i);
			chunk->bloom_busy = 0;
			break;
		}
	}

	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);
	return (ret);
}
コード例 #3
0
ファイル: lsm_work_unit.c プロジェクト: brianleepzx/mongo
/*
 * __wt_lsm_free_chunks --
 *	Try to drop chunks from the tree that are no longer required.
 */
int
__wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_WORKER_COOKIE cookie;
	u_int i, skipped;
	int drop_ret;
	bool flush_metadata;

	flush_metadata = false;

	if (lsm_tree->nold_chunks == 0)
		return (0);

	/*
	 * Make sure only a single thread is freeing the old chunk array
	 * at any time.
	 */
	if (!__wt_atomic_cas32(&lsm_tree->freeing_old_chunks, 0, 1))
		return (0);
	/*
	 * Take a copy of the current state of the LSM tree and look for chunks
	 * to drop.  We do it this way to avoid holding the LSM tree lock while
	 * doing I/O or waiting on the schema lock.
	 *
	 * This is safe because only one thread will be in this function at a
	 * time.  Merges may complete concurrently, and the old_chunks array
	 * may be extended, but we shuffle down the pointers each time we free
	 * one to keep the non-NULL slots at the beginning of the array.
	 */
	WT_CLEAR(cookie);
	WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, true));
	for (i = skipped = 0; i < cookie.nchunks; i++) {
		chunk = cookie.chunk_array[i];
		WT_ASSERT(session, chunk != NULL);
		/* Skip the chunk if another worker is using it. */
		if (chunk->refcnt > 1) {
			++skipped;
			continue;
		}

		/*
		 * Don't remove files if a hot backup is in progress.
		 *
		 * The schema lock protects the set of live files, this check
		 * prevents us from removing a file that hot backup already
		 * knows about.
		 */
		if (S2C(session)->hot_backup)
			break;

		/*
		 * Drop any bloom filters and chunks we can. Don't try to drop
		 * a chunk if the bloom filter drop fails.
		 *  An EBUSY return indicates that a cursor is still open in
		 *       the tree - move to the next chunk in that case.
		 * An ENOENT return indicates that the LSM tree metadata was
		 *       out of sync with the on disk state. Update the
		 *       metadata to match in that case.
		 */
		if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) {
			drop_ret = __lsm_drop_file(session, chunk->bloom_uri);
			if (drop_ret == EBUSY) {
				++skipped;
				continue;
			} else if (drop_ret != ENOENT)
				WT_ERR(drop_ret);

			flush_metadata = true;
			F_CLR(chunk, WT_LSM_CHUNK_BLOOM);
		}
		if (chunk->uri != NULL) {
			drop_ret = __lsm_drop_file(session, chunk->uri);
			if (drop_ret == EBUSY) {
				++skipped;
				continue;
			} else if (drop_ret != ENOENT)
				WT_ERR(drop_ret);
			flush_metadata = true;
		}

		/* Lock the tree to clear out the old chunk information. */
		WT_ERR(__wt_lsm_tree_writelock(session, lsm_tree));

		/*
		 * The chunk we are looking at should be the first one in the
		 * tree that we haven't already skipped over.
		 */
		WT_ASSERT(session, lsm_tree->old_chunks[skipped] == chunk);
		__wt_free(session, chunk->bloom_uri);
		__wt_free(session, chunk->uri);
		__wt_free(session, lsm_tree->old_chunks[skipped]);

		/* Shuffle down to keep all occupied slots at the beginning. */
		if (--lsm_tree->nold_chunks > skipped) {
			memmove(lsm_tree->old_chunks + skipped,
			    lsm_tree->old_chunks + skipped + 1,
			    (lsm_tree->nold_chunks - skipped) *
			    sizeof(WT_LSM_CHUNK *));
			lsm_tree->old_chunks[lsm_tree->nold_chunks] = NULL;
		}

		WT_ERR(__wt_lsm_tree_writeunlock(session, lsm_tree));

		/*
		 * Clear the chunk in the cookie so we don't attempt to
		 * decrement the reference count.
		 */
		cookie.chunk_array[i] = NULL;
	}

err:	/* Flush the metadata unless the system is in panic */
	if (flush_metadata && ret != WT_PANIC) {
		WT_TRET(__wt_lsm_tree_writelock(session, lsm_tree));
		WT_TRET(__wt_lsm_meta_write(session, lsm_tree));
		WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
	}
	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);
	lsm_tree->freeing_old_chunks = 0;

	/* Returning non-zero means there is no work to do. */
	if (!flush_metadata)
		WT_TRET(WT_NOTFOUND);

	return (ret);
}
コード例 #4
0
/*
 * __lsm_free_chunks --
 *	Try to drop chunks from the tree that are no longer required.
 */
static int
__lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_WORKER_COOKIE cookie;
	u_int i, skipped;
	int progress;

	/*
	 * Take a copy of the current state of the LSM tree and look for chunks
	 * to drop.  We do it this way to avoid holding the LSM tree lock while
	 * doing I/O or waiting on the schema lock.
	 *
	 * This is safe because only one thread will be in this function at a
	 * time (the first merge thread).  Merges may complete concurrently,
	 * and the old_chunks array may be extended, but we shuffle down the
	 * pointers each time we free one to keep the non-NULL slots at the
	 * beginning of the array.
	 */
	WT_CLEAR(cookie);
	WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, 1));
	for (i = skipped = 0, progress = 0; i < cookie.nchunks; i++) {
		chunk = cookie.chunk_array[i];
		WT_ASSERT(session, chunk != NULL);
		/* Skip the chunk if another worker is using it. */
		if (chunk->refcnt > 1) {
			++skipped;
			continue;
		}

		if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_BLOOM)) {
			/*
			 * An EBUSY return is acceptable - a cursor may still
			 * be positioned on this old chunk.
			 */
			if ((ret = __lsm_drop_file(
			    session, chunk->bloom_uri)) == EBUSY) {
				WT_VERBOSE_ERR(session, lsm,
				    "LSM worker bloom drop busy: %s.",
				    chunk->bloom_uri);
				++skipped;
				continue;
			} else
				WT_ERR(ret);

			F_CLR_ATOMIC(chunk, WT_LSM_CHUNK_BLOOM);
		}
		if (chunk->uri != NULL) {
			/*
			 * An EBUSY return is acceptable - a cursor may still
			 * be positioned on this old chunk.
			 */
			if ((ret = __lsm_drop_file(
			    session, chunk->uri)) == EBUSY) {
				WT_VERBOSE_ERR(session, lsm,
				    "LSM worker drop busy: %s.",
				    chunk->uri);
				++skipped;
				continue;
			} else
				WT_ERR(ret);
		}

		progress = 1;

		/* Lock the tree to clear out the old chunk information. */
		WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1));

		/*
		 * The chunk we are looking at should be the first one in the
		 * tree that we haven't already skipped over.
		 */
		WT_ASSERT(session, lsm_tree->old_chunks[skipped] == chunk);
		__wt_free(session, chunk->bloom_uri);
		__wt_free(session, chunk->uri);
		__wt_free(session, lsm_tree->old_chunks[skipped]);

		/* Shuffle down to keep all occupied slots at the beginning. */
		if (--lsm_tree->nold_chunks > skipped) {
			memmove(lsm_tree->old_chunks + skipped,
			    lsm_tree->old_chunks + skipped + 1,
			    (lsm_tree->nold_chunks - skipped) *
			    sizeof(WT_LSM_CHUNK *));
			lsm_tree->old_chunks[lsm_tree->nold_chunks] = NULL;
		}
		/*
		 * Clear the chunk in the cookie so we don't attempt to
		 * decrement the reference count.
		 */
		cookie.chunk_array[i] = NULL;

		/*
		 * Update the metadata.  We used to try to optimize by only
		 * updating the metadata once at the end, but the error
		 * handling is not straightforward.
		 */
		WT_TRET(__wt_lsm_meta_write(session, lsm_tree));
		WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
	}

err:	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);

	/* Returning non-zero means there is no work to do. */
	if (!progress)
		WT_TRET(WT_NOTFOUND);

	return (ret);
}
コード例 #5
0
/*
 * __wt_lsm_checkpoint_worker --
 *	A worker thread for an LSM tree, responsible for flushing new chunks to
 *	disk.
 */
void *
__wt_lsm_checkpoint_worker(void *arg)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_TREE *lsm_tree;
	WT_LSM_WORKER_COOKIE cookie;
	WT_SESSION_IMPL *session;
	WT_TXN_ISOLATION saved_isolation;
	u_int i, j;
	int locked;

	lsm_tree = arg;
	session = lsm_tree->ckpt_session;

	WT_CLEAR(cookie);

	while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
		if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
			WT_WITH_SCHEMA_LOCK(session, ret =
			    __wt_lsm_tree_switch(session, lsm_tree));
			WT_ERR(ret);
		}

		WT_ERR(__lsm_copy_chunks(session, lsm_tree, &cookie, 0));

		/* Write checkpoints in all completed files. */
		for (i = 0, j = 0; i < cookie.nchunks - 1; i++) {
			if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING))
				goto err;

			if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
				break;

			chunk = cookie.chunk_array[i];

			/* Stop if a running transaction needs the chunk. */
			__wt_txn_update_oldest(session);
			if (!__wt_txn_visible_all(session, chunk->txnid_max))
				break;

			/*
			 * If the chunk is already checkpointed, make sure it
			 * is also evicted.  Either way, there is no point
			 * trying to checkpoint it again.
			 */
			if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK)) {
				if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_EVICTED))
					continue;

				if ((ret = __lsm_discard_handle(
				    session, chunk->uri, NULL)) == 0)
					F_SET_ATOMIC(
					    chunk, WT_LSM_CHUNK_EVICTED);
				else if (ret == EBUSY)
					ret = 0;
				else
					WT_ERR_MSG(session, ret,
					    "discard handle");
				continue;
			}

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker flushing %u", i);

			/*
			 * Flush the file before checkpointing: this is the
			 * expensive part in terms of I/O: do it without
			 * holding the schema lock.
			 *
			 * Use the special eviction isolation level to avoid
			 * interfering with an application checkpoint: we have
			 * already checked that all of the updates in this
			 * chunk are globally visible.
			 *
			 * !!! We can wait here for checkpoints and fsyncs to
			 * complete, which can be a long time.
			 *
			 * Don't keep waiting for the lock if application
			 * threads are waiting for a switch.  Don't skip
			 * flushing the leaves either: that just means we'll
			 * hold the schema lock for (much) longer, which blocks
			 * the world.
			 */
			WT_ERR(__wt_session_get_btree(
			    session, chunk->uri, NULL, NULL, 0));
			for (locked = 0;
			    !locked && ret == 0 &&
			    !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH);) {
				if ((ret = __wt_spin_trylock(session,
				    &S2C(session)->checkpoint_lock)) == 0)
					locked = 1;
				else if (ret == EBUSY) {
					__wt_yield();
					ret = 0;
				}
			}
			if (locked) {
				saved_isolation = session->txn.isolation;
				session->txn.isolation = TXN_ISO_EVICTION;
				ret = __wt_bt_cache_op(
				    session, NULL, WT_SYNC_WRITE_LEAVES);
				session->txn.isolation = saved_isolation;
				__wt_spin_unlock(
				    session, &S2C(session)->checkpoint_lock);
			}
			WT_TRET(__wt_session_release_btree(session));
			WT_ERR(ret);

			if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
				break;

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker checkpointing %u", i);

			WT_WITH_SCHEMA_LOCK(session,
			    ret = __wt_schema_worker(session, chunk->uri,
			    __wt_checkpoint, NULL, NULL, 0));

			if (ret != 0) {
				__wt_err(session, ret, "LSM checkpoint");
				break;
			}

			WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
			/*
			 * Clear the "cache resident" flag so the primary can
			 * be evicted and eventually closed.  Only do this once
			 * the checkpoint has succeeded: otherwise, accessing
			 * the leaf page during the checkpoint can trigger
			 * forced eviction.
			 */
			WT_ERR(__wt_session_get_btree(
			    session, chunk->uri, NULL, NULL, 0));
			__wt_btree_evictable(session, 1);
			WT_ERR(__wt_session_release_btree(session));

			++j;
			WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1));
			F_SET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK);
			ret = __wt_lsm_meta_write(session, lsm_tree);
			++lsm_tree->dsk_gen;

			/* Update the throttle time. */
			__wt_lsm_tree_throttle(session, lsm_tree);
			WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree));

			/* Make sure we aren't pinning a transaction ID. */
			__wt_txn_release_snapshot(session);

			if (ret != 0) {
				__wt_err(session, ret,
				    "LSM checkpoint metadata write");
				break;
			}

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker checkpointed %u", i);
		}
		__lsm_unpin_chunks(session, &cookie);
		if (j == 0 && F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) &&
		    !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
			WT_ERR_TIMEDOUT_OK(__wt_cond_wait(
			    session, lsm_tree->work_cond, 100000));
	}
err:	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);
	/*
	 * The thread will only exit with failure if we run out of memory or
	 * there is some other system driven failure. We can't keep going
	 * after such a failure - ensure WiredTiger shuts down.
	 */
	if (ret != 0 && ret != WT_NOTFOUND)
		WT_PANIC_ERR(session, ret,
		    "Shutting down LSM checkpoint utility thread");
	return (NULL);
}