コード例 #1
0
ファイル: conn_log.c プロジェクト: pgmarchenko/mongo
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = arg;
	conn = S2C(session);
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Write out any log record buffers.
		 */
		WT_ERR(__wt_log_wrlsn(session));
		WT_ERR(__wt_cond_wait(session, conn->log_wrlsn_cond, 10000));
	}
	/*
	 * On close we need to do this one more time because there could
	 * be straggling log writes that need to be written.
	 */
	WT_ERR(__wt_log_force_write(session, 1));
	WT_ERR(__wt_log_wrlsn(session));
	if (0) {
err:		__wt_err(session, ret, "log wrlsn server error");
	}
	return (WT_THREAD_RET_VALUE);
}
コード例 #2
0
ファイル: conn_ckpt.c プロジェクト: EaseTech/wiredtiger
/*
 * __ckpt_server --
 *	The checkpoint server thread.
 */
static void *
__ckpt_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION *wt_session;
	WT_SESSION_IMPL *session;

	session = arg;
	conn = S2C(session);
	wt_session = (WT_SESSION *)session;

	while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    F_ISSET(conn, WT_CONN_SERVER_CHECKPOINT)) {
		/* Checkpoint the database. */
		WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config));

		/* Wait... */
		WT_ERR_TIMEDOUT_OK(
		    __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs));
	}

	if (0) {
err:		__wt_err(session, ret, "checkpoint server error");
	}
	return (NULL);
}
コード例 #3
0
/*
 * __sweep_server --
 *	The handle sweep server thread.
 */
static void *
__sweep_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = arg;
	conn = S2C(session);

	/*
	 * Sweep for dead handles.
	 */
	while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    F_ISSET(conn, WT_CONN_SERVER_SWEEP)) {

		/* Wait until the next event. */
		WT_ERR_TIMEDOUT_OK(
		    __wt_cond_wait(session, conn->sweep_cond, 30 * WT_MILLION));

		/* Sweep the handles. */
		WT_ERR(__sweep(session));
	}

	if (0) {
err:		__wt_err(session, ret, "handle sweep server error");
	}
	return (NULL);
}
コード例 #4
0
ファイル: conn_log.c プロジェクト: alabid/mongo
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	int locked, yield;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = yield = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		__wt_spin_lock(session, &log->log_slot_lock);
		locked = 1;
		WT_ERR(__wt_log_wrlsn(session, NULL, &yield));
		locked = 0;
		__wt_spin_unlock(session, &log->log_slot_lock);
		if (++yield < 1000)
			__wt_yield();
		else
			WT_ERR(__wt_cond_wait(session,
			    conn->log_wrlsn_cond, 100000));
	}
	if (0) {
err:		__wt_err(session, ret, "log wrlsn server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_slot_lock);
	return (WT_THREAD_RET_VALUE);
}
コード例 #5
0
/*
 * __ckpt_server --
 *	The checkpoint server thread.
 */
static void *
__ckpt_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION *wt_session;
	WT_SESSION_IMPL *session;

	session = arg;
	conn = S2C(session);
	wt_session = (WT_SESSION *)session;

	while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    F_ISSET(conn, WT_CONN_SERVER_CHECKPOINT)) {
		/* Checkpoint the database. */
		WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config));

		/* Reset. */
		if (conn->ckpt_logsize) {
			__wt_log_written_reset(session);
			conn->ckpt_signalled = 0;

			/*
			 * In case we crossed the log limit during the
			 * checkpoint and the condition variable was already
			 * signalled, do a tiny wait to clear it so we don't do
			 * another checkpoint immediately.
			 */
			WT_ERR(__wt_cond_wait(session, conn->ckpt_cond, 1));
		}

		/*
		 * Wait...
		 * NOTE: If the user only configured logsize, then usecs
		 * will be 0 and this wait won't return until signalled.
		 */
		WT_ERR(
		    __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs));
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret, "checkpoint server error");
	}
	return (NULL);
}
コード例 #6
0
ファイル: bt_sync.c プロジェクト: niumowm/wiredtiger
/*
 * __wt_bt_cache_op --
 *	Cache operations: compaction, discard, sync/checkpoint.
 */
int
__wt_bt_cache_op(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, int op)
{
	WT_DECL_RET;
	WT_BTREE *btree;

	btree = session->btree;

	/*
	 * Compaction and sync/checkpoint reconcile dirty pages from the cache
	 * to the backing block manager.  Reconciliation is just another reader
	 * of the page, so with some care, it can be done in the current thread,
	 * leaving the eviction thread to keep freeing spaces if the cache is
	 * full.  Sync and eviction cannot operate on the same page at the same
	 * time, and there are different modes inside __wt_tree_walk to make
	 * sure they don't trip over each other.
	 *
	 * The current thread cannot evict pages from the cache, so discard is
	 * done by calling the eviction server for service.
	 *
	 * XXX
	 * Set the checkpoint reference for reconciliation -- this is ugly, but
	 * there's no data structure path from here to reconciliation.
	 *
	 * Publish: there must be a barrier to ensure the structure fields are
	 * set before the eviction thread can see the request.
	 */
	WT_PUBLISH(btree->ckpt, ckptbase);

	switch (op) {
	case WT_SYNC_CHECKPOINT:
	case WT_SYNC_COMPACT:
	case WT_SYNC_WRITE_LEAVES:
		WT_ERR(__wt_sync_file(session, op));
		break;
	case WT_SYNC_DISCARD:
	case WT_SYNC_DISCARD_NOWRITE:
		/*
		 * Schedule and wake the eviction server, then wait for the
		 * eviction server to wake us.
		 */
		WT_ERR(__wt_sync_file_serial(session, op));
		WT_ERR(__wt_evict_server_wake(session));
		WT_ERR(__wt_cond_wait(session, session->cond, 0));
		ret = session->syncop_ret;

		/* If discarding the tree, the root page should be gone. */
		WT_ASSERT(session, ret != 0 || btree->root_page == NULL);
		break;
	WT_ILLEGAL_VALUE_ERR(session);
	}

err:	btree->ckpt = NULL;
	return (ret);
}
コード例 #7
0
ファイル: conn_log.c プロジェクト: christkv/mongo
/*
 * __log_server --
 *	The log server thread.
 */
static WT_THREAD_RET
__log_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	u_int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Perform log pre-allocation.
		 */
		if (conn->log_prealloc > 0)
			WT_ERR(__log_prealloc_once(session));

		/*
		 * Perform the archive.
		 */
		if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
			if (__wt_try_writelock(
			    session, log->log_archive_lock) == 0) {
				locked = 1;
				WT_ERR(__log_archive_once(session, 0));
				WT_ERR(	__wt_writeunlock(
				    session, log->log_archive_lock));
				locked = 0;
			} else
				WT_ERR(__wt_verbose(session, WT_VERB_LOG,
				    "log_archive: Blocked due to open log "
				    "cursor holding archive lock"));
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(session, conn->log_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log server error");
	}
	if (locked)
		(void)__wt_writeunlock(session, log->log_archive_lock);
	return (WT_THREAD_RET_VALUE);
}
コード例 #8
0
ファイル: conn_log.c プロジェクト: pgmarchenko/mongo
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint32_t filenum;
	int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * We update the close file handle before updating the
			 * close LSN when changing files.  It is possible we
			 * could see mismatched settings.  If we do, yield
			 * until it is set.  This should rarely happen.
			 */
			while (log->log_close_lsn.file < filenum)
				__wt_yield();

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				close_end_lsn.file++;
				close_end_lsn.offset = 0;
				WT_ERR(__wt_fsync(session, close_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_sync_cond));
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				WT_ERR(__wt_fsync(session, log->log_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					log->sync_lsn = min_lsn;
					WT_ERR(__wt_cond_signal(
					    session, log->log_sync_cond));
				}
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				WT_ERR(__wt_cond_signal(
				    session, conn->log_wrlsn_cond));
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				__wt_yield();
				continue;
			}
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(
		    session, conn->log_file_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
コード例 #9
0
ファイル: lsm_worker.c プロジェクト: 7segments/mongo-1
/*
 * __lsm_worker --
 *	A thread that executes work units for all open LSM trees.
 */
static WT_THREAD_RET
__lsm_worker(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LSM_WORK_UNIT *entry;
	WT_LSM_WORKER_ARGS *cookie;
	WT_SESSION_IMPL *session;
	int progress, ran;

	cookie = (WT_LSM_WORKER_ARGS *)arg;
	session = cookie->session;
	conn = S2C(session);

	entry = NULL;
	while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    F_ISSET(cookie, WT_LSM_WORKER_RUN)) {
		progress = 0;

		/*
		 * Workers process the different LSM work queues.  Some workers
		 * can handle several or all work unit types.  So the code is
		 * prioritized so important operations happen first.
		 * Switches are the highest priority.
		 */
		while (FLD_ISSET(cookie->type, WT_LSM_WORK_SWITCH) &&
		    (ret = __wt_lsm_manager_pop_entry(
		    session, WT_LSM_WORK_SWITCH, &entry)) == 0 &&
		    entry != NULL)
			WT_ERR(
			    __wt_lsm_work_switch(session, &entry, &progress));
		/* Flag an error if the pop failed. */
		WT_ERR(ret);

		/*
		 * Next the general operations.
		 */
		ret = __lsm_worker_general_op(session, cookie, &ran);
		if (ret == EBUSY || ret == WT_NOTFOUND)
			ret = 0;
		WT_ERR(ret);
		progress = progress || ran;

		/*
		 * Finally see if there is any merge work we can do.  This is
		 * last because the earlier operations may result in adding
		 * merge work to the queue.
		 */
		if (FLD_ISSET(cookie->type, WT_LSM_WORK_MERGE) &&
		    (ret = __wt_lsm_manager_pop_entry(
		    session, WT_LSM_WORK_MERGE, &entry)) == 0 &&
		    entry != NULL) {
			WT_ASSERT(session, entry->type == WT_LSM_WORK_MERGE);
			ret = __wt_lsm_merge(session,
			    entry->lsm_tree, cookie->id);
			if (ret == WT_NOTFOUND) {
				F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING);
				ret = 0;
			} else if (ret == EBUSY)
				ret = 0;

			/* Paranoia: clear session state. */
			session->dhandle = NULL;

			__wt_lsm_manager_free_work_unit(session, entry);
			entry = NULL;
			progress = 1;
		}
		/* Flag an error if the pop failed. */
		WT_ERR(ret);

		/* Don't busy wait if there was any work to do. */
		if (!progress) {
			WT_ERR(
			    __wt_cond_wait(session, cookie->work_cond, 10000));
			continue;
		}
	}

	if (ret != 0) {
err:		__wt_lsm_manager_free_work_unit(session, entry);
		WT_PANIC_MSG(session, ret,
		    "Error in LSM worker thread %d", cookie->id);
	}
	return (WT_THREAD_RET_VALUE);
}
コード例 #10
0
ファイル: conn_log.c プロジェクト: christkv/mongo
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
	WT_LOGSLOT *slot;
	WT_SESSION_IMPL *session;
	size_t written_i;
	uint32_t i, save_i;
	int yield;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	yield = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * No need to use the log_slot_lock because the slot pool
		 * is statically allocated and any slot in the
		 * WT_LOG_SLOT_WRITTEN state is exclusively ours for now.
		 */
		i = 0;
		written_i = 0;
		/*
		 * Walk the array once saving any slots that are in the
		 * WT_LOG_SLOT_WRITTEN state.
		 */
		while (i < WT_SLOT_POOL) {
			save_i = i;
			slot = &log->slot_pool[i++];
			if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
				continue;
			written[written_i].slot_index = save_i;
			written[written_i++].lsn = slot->slot_release_lsn;
		}
		/*
		 * If we found any written slots process them.  We sort them
		 * based on the release LSN, and then look for them in order.
		 */
		if (written_i > 0) {
			yield = 0;
			WT_INSERTION_SORT(written, written_i,
			    WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);

			/*
			 * We know the written array is sorted by LSN.  Go
			 * through them either advancing write_lsn or stop
			 * as soon as one is not in order.
			 */
			for (i = 0; i < written_i; i++) {
				if (WT_LOG_CMP(&log->write_lsn,
				    &written[i].lsn) != 0)
					break;
				/*
				 * If we get here we have a slot to process.
				 * Advance the LSN and process the slot.
				 */
				slot = &log->slot_pool[written[i].slot_index];
				WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn,
				    &slot->slot_release_lsn) == 0);
				log->write_start_lsn = slot->slot_start_lsn;
				log->write_lsn = slot->slot_end_lsn;
				WT_ERR(__wt_cond_signal(session,
				    log->log_write_cond));
				WT_STAT_FAST_CONN_INCR(session, log_write_lsn);

				/*
				 * Signal the close thread if needed.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					WT_ERR(__wt_cond_signal(session,
					    conn->log_file_cond));
				WT_ERR(__wt_log_slot_free(session, slot));
			}
		}
		/*
		 * If we saw a later write, we always want to yield because
		 * we know something is in progress.
		 */
		if (yield++ < 1000)
			__wt_yield();
		else
			/* Wait until the next event. */
			WT_ERR(__wt_cond_wait(session,
			    conn->log_wrlsn_cond, 100000));
	}

	if (0)
err:		__wt_err(session, ret, "log wrlsn server error");
	return (WT_THREAD_RET_VALUE);
}
コード例 #11
0
ファイル: conn_log.c プロジェクト: christkv/mongo
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, close_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL &&
		    (ret = __wt_log_extract_lognum(session, close_fh->name,
		    &close_lsn.file)) == 0 &&
		    close_lsn.file < log->write_lsn.file) {
			/*
			 * We've copied the file handle, clear out the one in
			 * log structure to allow it to be set again.
			 */
			log->log_close_fh = NULL;
			/*
			 * Set the close_end_lsn to the LSN immediately after
			 * ours.  That is, the beginning of the next log file.
			 * We need to know the LSN file number of our own close
			 * in case earlier calls are still in progress and the
			 * next one to move the sync_lsn into the next file for
			 * later syncs.
			 */
			close_lsn.offset = 0;
			close_end_lsn = close_lsn;
			close_end_lsn.file++;
			WT_ERR(__wt_fsync(session, close_fh));
			__wt_spin_lock(session, &log->log_sync_lock);
			locked = 1;
			WT_ERR(__wt_close(session, &close_fh));
			WT_ASSERT(session,
			    WT_LOG_CMP(&close_end_lsn, &log->sync_lsn) >= 0);
			log->sync_lsn = close_end_lsn;
			WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
			locked = 0;
			__wt_spin_unlock(session, &log->log_sync_lock);
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (WT_LOG_CMP(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * The sync LSN we asked for better be smaller than
			 * the current written LSN.
			 */
			WT_ASSERT(session,
			    WT_LOG_CMP(&log->bg_sync_lsn, &min_lsn) <= 0);
			WT_ERR(__wt_fsync(session, log->log_fh));
			__wt_spin_lock(session, &log->log_sync_lock);
			locked = 1;
			/*
			 * The sync LSN could have advanced while we were
			 * writing to disk.
			 */
			if (WT_LOG_CMP(&log->sync_lsn, &min_lsn) <= 0) {
				log->sync_lsn = min_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_sync_cond));
			}
			locked = 0;
			__wt_spin_unlock(session, &log->log_sync_lock);
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(
		    session, conn->log_file_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
コード例 #12
0
ファイル: log.c プロジェクト: EaseTech/wiredtiger
/*
 * __wt_log_write --
 *	Write a record into the log.
 */
int
__wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
    uint32_t flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOG_RECORD *logrec;
	WT_LSN lsn;
	WT_MYSLOT myslot;
	uint32_t rdup_len;
	int locked;

	conn = S2C(session);
	log = conn->log;
	locked = 0;
	INIT_LSN(&lsn);
	myslot.slot = NULL;
	/*
	 * Assume the WT_ITEM the user passed is a WT_LOG_RECORD, which has
	 * a header at the beginning for us to fill in.
	 *
	 * If using direct_io, the caller should pass us an aligned record.
	 * But we need to make sure it is big enough and zero-filled so
	 * that we can write the full amount.  Do this whether or not
	 * direct_io is in use because it makes the reading code cleaner.
	 */
	WT_STAT_FAST_CONN_INCRV(session, log_bytes_user, record->size);
	rdup_len = __wt_rduppo2((uint32_t)record->size, log->allocsize);
	WT_ERR(__wt_buf_grow(session, record, rdup_len));
	WT_ASSERT(session, record->data == record->mem);
	/*
	 * If the caller's record only partially fills the necessary
	 * space, we need to zero-fill the remainder.
	 */
	if (record->size != rdup_len) {
		memset((uint8_t *)record->mem + record->size, 0,
		    rdup_len - record->size);
		record->size = rdup_len;
	}
	logrec = (WT_LOG_RECORD *)record->mem;
	logrec->len = (uint32_t)record->size;
	logrec->checksum = 0;
	logrec->checksum = __wt_cksum(logrec, record->size);

	WT_STAT_FAST_CONN_INCR(session, log_writes);

	if (!F_ISSET(log, WT_LOG_FORCE_CONSOLIDATE)) {
		ret = __log_direct_write(session, record, lsnp, flags);
		if (ret == 0)
			return (0);
		if (ret != EAGAIN)
			WT_ERR(ret);
		/*
		 * An EAGAIN return means we failed to get the try lock -
		 * fall through to the consolidation code in that case.
		 */
	}

	/*
	 * As soon as we see contention for the log slot, disable direct
	 * log writes. We get better performance by forcing writes through
	 * the consolidation code. This is because individual writes flood
	 * the I/O system faster than they contend on the log slot lock.
	 */
	F_SET(log, WT_LOG_FORCE_CONSOLIDATE);
	if ((ret = __wt_log_slot_join(
	    session, rdup_len, flags, &myslot)) == ENOMEM) {
		/*
		 * If we couldn't find a consolidated slot for this record
		 * write the record directly.
		 */
		while ((ret = __log_direct_write(
		    session, record, lsnp, flags)) == EAGAIN)
			;
		WT_ERR(ret);
		/*
		 * Increase the buffer size of any slots we can get access
		 * to, so future consolidations are likely to succeed.
		 */
		WT_ERR(__wt_log_slot_grow_buffers(session, 4 * rdup_len));
		return (0);
	}
	WT_ERR(ret);
	if (myslot.offset == 0) {
		__wt_spin_lock(session, &log->log_slot_lock);
		locked = 1;
		WT_ERR(__wt_log_slot_close(session, myslot.slot));
		WT_ERR(__log_acquire(
		    session, myslot.slot->slot_group_size, myslot.slot));
		__wt_spin_unlock(session, &log->log_slot_lock);
		locked = 0;
		WT_ERR(__wt_log_slot_notify(session, myslot.slot));
	} else
		WT_ERR(__wt_log_slot_wait(session, myslot.slot));
	WT_ERR(__log_fill(session, &myslot, 0, record, &lsn));
	if (__wt_log_slot_release(myslot.slot, rdup_len) == WT_LOG_SLOT_DONE) {
		WT_ERR(__log_release(session, myslot.slot));
		WT_ERR(__wt_log_slot_free(myslot.slot));
	} else if (LF_ISSET(WT_LOG_FSYNC)) {
		/* Wait for our writes to reach disk */
		while (LOG_CMP(&log->sync_lsn, &lsn) <= 0 &&
		    myslot.slot->slot_error == 0)
			(void)__wt_cond_wait(
			    session, log->log_sync_cond, 10000);
	}
err:
	if (locked)
		__wt_spin_unlock(session, &log->log_slot_lock);
	if (ret == 0 && lsnp != NULL)
		*lsnp = lsn;
	/*
	 * If we're synchronous and some thread had an error, we don't know
	 * if our write made it out to the file or not.  The error could be
	 * before or after us.  So, if anyone got an error, we report it.
	 * If we're not synchronous, only report if our own operation got
	 * an error.
	 */
	if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC) && ret == 0 &&
	    myslot.slot != NULL)
		ret = myslot.slot->slot_error;
	return (ret);
}
コード例 #13
0
ファイル: log.c プロジェクト: EaseTech/wiredtiger
/*
 * __log_release --
 *	Release a log slot.
 */
static int
__log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN sync_lsn;
	size_t write_size;
	WT_DECL_SPINLOCK_ID(id);			/* Must appear last */

	conn = S2C(session);
	log = conn->log;
	/*
	 * If we're going to have to close our log file, make a local copy
	 * of the file handle structure.
	 */
	close_fh = NULL;
	if (F_ISSET(slot, SLOT_CLOSEFH)) {
		close_fh = log->log_close_fh;
		log->log_close_fh = NULL;
		F_CLR(slot, SLOT_CLOSEFH);
	}

	/* Write the buffered records */
	if (F_ISSET(slot, SLOT_BUFFERED)) {
		write_size = (size_t)
		    (slot->slot_end_lsn.offset - slot->slot_start_offset);
		WT_ERR(__wt_write(session, slot->slot_fh,
		    slot->slot_start_offset, write_size, slot->slot_buf.mem));
	}

	/*
	 * Wait for earlier groups to finish, otherwise there could be holes
	 * in the log file.
	 */
	while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0)
		__wt_yield();
	log->write_lsn = slot->slot_end_lsn;
	/*
	 * Try to consolidate calls to fsync to wait less.  Acquire a spin lock
	 * so that threads finishing writing to the log will wait while the
	 * current fsync completes and advance log->write_lsn.
	 */
	while (F_ISSET(slot, SLOT_SYNC) &&
	    LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
		if (__wt_spin_trylock(session, &log->log_sync_lock, &id) != 0) {
			(void)__wt_cond_wait(
			    session, log->log_sync_cond, 10000);
			continue;
		}
		/*
		 * Record the current end of log after we grabbed the lock.
		 * That is how far our fsync call with guarantee.
		 */
		sync_lsn = log->write_lsn;
		if (LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
			WT_STAT_FAST_CONN_INCR(session, log_sync);
			ret = __wt_fsync(session, log->log_fh);
			if (ret == 0) {
				F_CLR(slot, SLOT_SYNC);
				log->sync_lsn = sync_lsn;
				ret = __wt_cond_signal(
				    session, log->log_sync_cond);
			}
		}
		__wt_spin_unlock(session, &log->log_sync_lock);
		WT_ERR(ret);
	}
	if (F_ISSET(slot, SLOT_BUF_GROW)) {
		WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
		F_CLR(slot, SLOT_BUF_GROW);
		WT_STAT_FAST_CONN_INCRV(session,
		    log_buffer_size, slot->slot_buf.memsize);
		WT_ERR(__wt_buf_grow(session,
		    &slot->slot_buf, slot->slot_buf.memsize * 2));
	}
	/*
	 * If we have a file to close, close it now.
	 */
	if (close_fh)
		WT_ERR(__wt_close(session, close_fh));

err:	if (ret != 0 && slot->slot_error == 0)
		slot->slot_error = ret;
	return (ret);
}
コード例 #14
0
ファイル: conn_log.c プロジェクト: judahschvimer/mongo
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint32_t filenum;
	bool locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = false;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * We update the close file handle before updating the
			 * close LSN when changing files.  It is possible we
			 * could see mismatched settings.  If we do, yield
			 * until it is set.  This should rarely happen.
			 */
			while (log->log_close_lsn.l.file < filenum)
				__wt_yield();

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				WT_ERR(__wt_fsync(session, close_fh, true));

				/*
				 * We want to have the file size reflect actual
				 * data with minimal pre-allocated zeroed space.
				 * We can't truncate the file during hot backup,
				 * or the underlying file system may not support
				 * truncate: both are OK, it's just more work
				 * during cursor traversal.
				 */
				if (!conn->hot_backup) {
					__wt_readlock(
					    session, conn->hot_backup_lock);
					if (!conn->hot_backup)
						WT_ERR_ERROR_OK(
						    __wt_ftruncate(session,
						    close_fh,
						    close_end_lsn.l.offset),
						    ENOTSUP);
					__wt_readunlock(
					    session, conn->hot_backup_lock);
				}
				WT_SET_LSN(&close_end_lsn,
				    close_end_lsn.l.file + 1, 0);
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				__wt_cond_signal(session, log->log_sync_cond);
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 *
			 * We also have to wait for the written LSN and the
			 * sync LSN to be in the same file so that we know we
			 * have synchronized all earlier log files.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				/*
				 * If the sync file is behind either the one
				 * wanted for a background sync or the write LSN
				 * has moved to another file continue to let
				 * this worker thread process that older file
				 * immediately.
				 */
				if ((log->sync_lsn.l.file <
				    log->bg_sync_lsn.l.file) ||
				    (log->sync_lsn.l.file < min_lsn.l.file))
					continue;
				WT_ERR(__wt_fsync(session, log->log_fh, true));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					WT_ASSERT(session,
					    min_lsn.l.file ==
					    log->sync_lsn.l.file);
					log->sync_lsn = min_lsn;
					__wt_cond_signal(
					    session, log->log_sync_cond);
				}
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				__wt_cond_auto_signal(
				    session, conn->log_wrlsn_cond);
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				__wt_yield();
				continue;
			}
		}
		/* Wait until the next event. */
		__wt_cond_wait(session, conn->log_file_cond, WT_MILLION / 10);
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
コード例 #15
0
/*
 * __wt_lsm_merge_worker --
 *	The merge worker thread for an LSM tree, responsible for merging
 *	on-disk trees.
 */
void *
__wt_lsm_merge_worker(void *vargs)
{
	WT_DECL_RET;
	WT_LSM_WORKER_ARGS *args;
	WT_LSM_TREE *lsm_tree;
	WT_SESSION_IMPL *session;
	u_int aggressive, chunk_wait, id, old_aggressive, stallms;
	int progress;

	args = vargs;
	lsm_tree = args->lsm_tree;
	id = args->id;
	session = lsm_tree->worker_sessions[id];
	__wt_free(session, args);

	aggressive = stallms = 0;

	while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
		/*
		 * Help out with switching chunks in case the checkpoint worker
		 * is busy.
		 */
		if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
			WT_WITH_SCHEMA_LOCK(session, ret =
			    __wt_lsm_tree_switch(session, lsm_tree));
			WT_ERR(ret);
		}

		progress = 0;

		/* Clear any state from previous worker thread iterations. */
		session->dhandle = NULL;

		/* Try to create a Bloom filter. */
		if (__lsm_bloom_work(session, lsm_tree) == 0)
			progress = 1;

		/* If we didn't create a Bloom filter, try to merge. */
		if (progress == 0 &&
		    __wt_lsm_merge(session, lsm_tree, id, aggressive) == 0)
			progress = 1;

		/* Clear any state from previous worker thread iterations. */
		WT_CLEAR_BTREE_IN_SESSION(session);

		/*
		 * Only have one thread freeing old chunks, and only if there
		 * are chunks to free.
		 */
		if (id == 0 && lsm_tree->nold_chunks > 0 &&
		    __lsm_free_chunks(session, lsm_tree) == 0)
			progress = 1;

		if (progress)
			stallms = 0;
		else if (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) &&
		    !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
			/* Poll 10 times per second. */
			WT_ERR_TIMEDOUT_OK(__wt_cond_wait(
			    session, lsm_tree->work_cond, 100000));
			stallms += 100;

			/*
			 * Get aggressive if more than enough chunks for a
			 * merge should have been created while we waited.
			 * Use 10 seconds as a default if we don't have an
			 * estimate.
			 */
			chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ?
			    10000 : lsm_tree->chunk_fill_ms);
			old_aggressive = aggressive;
			aggressive = chunk_wait / lsm_tree->merge_min;

			if (aggressive > old_aggressive)
				WT_VERBOSE_ERR(session, lsm,
				     "LSM merge got aggressive (%u), "
				     "%u / %" PRIu64,
				     aggressive, stallms,
				     lsm_tree->chunk_fill_ms);
		}
	}

	if (0) {
err:		__wt_err(session, ret, "LSM merge worker failed");
	}

	return (NULL);
}
コード例 #16
0
/*
 * __wt_lsm_checkpoint_worker --
 *	A worker thread for an LSM tree, responsible for flushing new chunks to
 *	disk.
 */
void *
__wt_lsm_checkpoint_worker(void *arg)
{
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_TREE *lsm_tree;
	WT_LSM_WORKER_COOKIE cookie;
	WT_SESSION_IMPL *session;
	WT_TXN_ISOLATION saved_isolation;
	u_int i, j;
	int locked;

	lsm_tree = arg;
	session = lsm_tree->ckpt_session;

	WT_CLEAR(cookie);

	while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
		if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
			WT_WITH_SCHEMA_LOCK(session, ret =
			    __wt_lsm_tree_switch(session, lsm_tree));
			WT_ERR(ret);
		}

		WT_ERR(__lsm_copy_chunks(session, lsm_tree, &cookie, 0));

		/* Write checkpoints in all completed files. */
		for (i = 0, j = 0; i < cookie.nchunks - 1; i++) {
			if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING))
				goto err;

			if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
				break;

			chunk = cookie.chunk_array[i];

			/* Stop if a running transaction needs the chunk. */
			__wt_txn_update_oldest(session);
			if (!__wt_txn_visible_all(session, chunk->txnid_max))
				break;

			/*
			 * If the chunk is already checkpointed, make sure it
			 * is also evicted.  Either way, there is no point
			 * trying to checkpoint it again.
			 */
			if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK)) {
				if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_EVICTED))
					continue;

				if ((ret = __lsm_discard_handle(
				    session, chunk->uri, NULL)) == 0)
					F_SET_ATOMIC(
					    chunk, WT_LSM_CHUNK_EVICTED);
				else if (ret == EBUSY)
					ret = 0;
				else
					WT_ERR_MSG(session, ret,
					    "discard handle");
				continue;
			}

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker flushing %u", i);

			/*
			 * Flush the file before checkpointing: this is the
			 * expensive part in terms of I/O: do it without
			 * holding the schema lock.
			 *
			 * Use the special eviction isolation level to avoid
			 * interfering with an application checkpoint: we have
			 * already checked that all of the updates in this
			 * chunk are globally visible.
			 *
			 * !!! We can wait here for checkpoints and fsyncs to
			 * complete, which can be a long time.
			 *
			 * Don't keep waiting for the lock if application
			 * threads are waiting for a switch.  Don't skip
			 * flushing the leaves either: that just means we'll
			 * hold the schema lock for (much) longer, which blocks
			 * the world.
			 */
			WT_ERR(__wt_session_get_btree(
			    session, chunk->uri, NULL, NULL, 0));
			for (locked = 0;
			    !locked && ret == 0 &&
			    !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH);) {
				if ((ret = __wt_spin_trylock(session,
				    &S2C(session)->checkpoint_lock)) == 0)
					locked = 1;
				else if (ret == EBUSY) {
					__wt_yield();
					ret = 0;
				}
			}
			if (locked) {
				saved_isolation = session->txn.isolation;
				session->txn.isolation = TXN_ISO_EVICTION;
				ret = __wt_bt_cache_op(
				    session, NULL, WT_SYNC_WRITE_LEAVES);
				session->txn.isolation = saved_isolation;
				__wt_spin_unlock(
				    session, &S2C(session)->checkpoint_lock);
			}
			WT_TRET(__wt_session_release_btree(session));
			WT_ERR(ret);

			if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
				break;

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker checkpointing %u", i);

			WT_WITH_SCHEMA_LOCK(session,
			    ret = __wt_schema_worker(session, chunk->uri,
			    __wt_checkpoint, NULL, NULL, 0));

			if (ret != 0) {
				__wt_err(session, ret, "LSM checkpoint");
				break;
			}

			WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
			/*
			 * Clear the "cache resident" flag so the primary can
			 * be evicted and eventually closed.  Only do this once
			 * the checkpoint has succeeded: otherwise, accessing
			 * the leaf page during the checkpoint can trigger
			 * forced eviction.
			 */
			WT_ERR(__wt_session_get_btree(
			    session, chunk->uri, NULL, NULL, 0));
			__wt_btree_evictable(session, 1);
			WT_ERR(__wt_session_release_btree(session));

			++j;
			WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1));
			F_SET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK);
			ret = __wt_lsm_meta_write(session, lsm_tree);
			++lsm_tree->dsk_gen;

			/* Update the throttle time. */
			__wt_lsm_tree_throttle(session, lsm_tree);
			WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree));

			/* Make sure we aren't pinning a transaction ID. */
			__wt_txn_release_snapshot(session);

			if (ret != 0) {
				__wt_err(session, ret,
				    "LSM checkpoint metadata write");
				break;
			}

			WT_VERBOSE_ERR(session, lsm,
			     "LSM worker checkpointed %u", i);
		}
		__lsm_unpin_chunks(session, &cookie);
		if (j == 0 && F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) &&
		    !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
			WT_ERR_TIMEDOUT_OK(__wt_cond_wait(
			    session, lsm_tree->work_cond, 100000));
	}
err:	__lsm_unpin_chunks(session, &cookie);
	__wt_free(session, cookie.chunk_array);
	/*
	 * The thread will only exit with failure if we run out of memory or
	 * there is some other system driven failure. We can't keep going
	 * after such a failure - ensure WiredTiger shuts down.
	 */
	if (ret != 0 && ret != WT_NOTFOUND)
		WT_PANIC_ERR(session, ret,
		    "Shutting down LSM checkpoint utility thread");
	return (NULL);
}