예제 #1
0
파일: conn_sweep.c 프로젝트: mdkhaled/mongo
/*
 * __sweep_server --
 *	The handle sweep server thread.
 */
static void *
__sweep_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	session = arg;
	conn = S2C(session);

	/*
	 * Sweep for dead handles.
	 */
	while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    F_ISSET(conn, WT_CONN_SERVER_SWEEP)) {

		/* Wait until the next event. */
		WT_ERR(
		    __wt_cond_wait(session, conn->sweep_cond, 30 * WT_MILLION));

		/* Sweep the handles. */
		WT_ERR(__sweep(session));
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret, "handle sweep server error");
	}
	return (NULL);
}
예제 #2
0
/*
 * __wt_thread_run --
 *	General wrapper for any thread.
 */
WT_THREAD_RET
__wt_thread_run(void *arg)
{
	WT_DECL_RET;
	WT_SESSION_IMPL *session;
	WT_THREAD *thread;

	thread = (WT_THREAD*)arg;
	session = thread->session;

	ret = thread->run_func(session, thread);

	if (ret != 0 && F_ISSET(thread, WT_THREAD_PANIC_FAIL))
		WT_PANIC_MSG(session, ret,
		    "Unrecoverable utility thread error");

	/*
	 * The three cases when threads are expected to stop are:
	 * 1.  When recovery is done.
	 * 2.  When the connection is closing.
	 * 3.  When a shutdown has been requested via clearing the run flag.
	 */
	WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_RUN) ||
	    F_ISSET(S2C(session), WT_CONN_CLOSING | WT_CONN_RECOVERING));

	return (WT_THREAD_RET_VALUE);
}
예제 #3
0
파일: conn_log.c 프로젝트: mpobrien/mongo
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LSN prev;
	WT_SESSION_IMPL *session;
	int yield;
	bool did_work;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	yield = 0;
	WT_INIT_LSN(&prev);
	while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
		/*
		 * Write out any log record buffers if anything was done
		 * since last time.  Only call the function to walk the
		 * slots if the system is not idle.  On an idle system
		 * the alloc_lsn will not advance and the written lsn will
		 * match the alloc_lsn.
		 */
		if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 ||
		    __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0)
			__wt_log_wrlsn(session, &yield);
		else
			WT_STAT_CONN_INCR(session, log_write_lsn_skip);
		prev = log->alloc_lsn;
		did_work = yield == 0;

		/*
		 * If __wt_log_wrlsn did work we want to yield instead of sleep.
		 */
		if (yield++ < WT_THOUSAND)
			__wt_yield();
		else
			__wt_cond_auto_wait(
			    session, conn->log_wrlsn_cond, did_work, NULL);
	}
	/*
	 * On close we need to do this one more time because there could
	 * be straggling log writes that need to be written.
	 */
	WT_ERR(__wt_log_force_write(session, 1, NULL));
	__wt_log_wrlsn(session, NULL);
	if (0) {
err:		WT_PANIC_MSG(session, ret, "log wrlsn server error");

	}
	return (WT_THREAD_RET_VALUE);
}
예제 #4
0
/*
 * __ckpt_server --
 *	The checkpoint server thread.
 */
static void *
__ckpt_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION *wt_session;
	WT_SESSION_IMPL *session;

	session = arg;
	conn = S2C(session);
	wt_session = (WT_SESSION *)session;

	while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    F_ISSET(conn, WT_CONN_SERVER_CHECKPOINT)) {
		/* Checkpoint the database. */
		WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config));

		/* Reset. */
		if (conn->ckpt_logsize) {
			__wt_log_written_reset(session);
			conn->ckpt_signalled = 0;

			/*
			 * In case we crossed the log limit during the
			 * checkpoint and the condition variable was already
			 * signalled, do a tiny wait to clear it so we don't do
			 * another checkpoint immediately.
			 */
			WT_ERR(__wt_cond_wait(session, conn->ckpt_cond, 1));
		}

		/*
		 * Wait...
		 * NOTE: If the user only configured logsize, then usecs
		 * will be 0 and this wait won't return until signalled.
		 */
		WT_ERR(
		    __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs));
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret, "checkpoint server error");
	}
	return (NULL);
}
예제 #5
0
/*
 * __wt_optrack_record_funcid --
 *	Allocate and record optrack function ID.
 */
void
__wt_optrack_record_funcid(
    WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp)
{
	static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(tmp);
	WT_DECL_RET;
	wt_off_t fsize;
	bool locked;

	conn = S2C(session);
	locked = false;

	WT_ERR(__wt_scr_alloc(session, strlen(func) + 32, &tmp));

	__wt_spin_lock(session, &conn->optrack_map_spinlock);
	locked = true;
	if (*func_idp == 0) {
		*func_idp = ++optrack_uid;

		WT_ERR(__wt_buf_fmt(
		    session, tmp, "%" PRIu16 " %s\n", *func_idp, func));
		WT_ERR(__wt_filesize(session, conn->optrack_map_fh, &fsize));
		WT_ERR(__wt_write(session,
		    conn->optrack_map_fh, fsize, tmp->size, tmp->data));
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret,
		    "operation tracking initialization failure");
	}

	if (locked)
		__wt_spin_unlock(session, &conn->optrack_map_spinlock);
	__wt_scr_free(session, &tmp);
}
예제 #6
0
/*
 * __lsm_worker --
 *	A thread that executes work units for all open LSM trees.
 */
static WT_THREAD_RET
__lsm_worker(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LSM_WORK_UNIT *entry;
	WT_LSM_WORKER_ARGS *cookie;
	WT_SESSION_IMPL *session;
	int progress, ran;

	cookie = (WT_LSM_WORKER_ARGS *)arg;
	session = cookie->session;
	conn = S2C(session);

	entry = NULL;
	while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    F_ISSET(cookie, WT_LSM_WORKER_RUN)) {
		progress = 0;

		/*
		 * Workers process the different LSM work queues.  Some workers
		 * can handle several or all work unit types.  So the code is
		 * prioritized so important operations happen first.
		 * Switches are the highest priority.
		 */
		while (FLD_ISSET(cookie->type, WT_LSM_WORK_SWITCH) &&
		    (ret = __wt_lsm_manager_pop_entry(
		    session, WT_LSM_WORK_SWITCH, &entry)) == 0 &&
		    entry != NULL)
			WT_ERR(
			    __wt_lsm_work_switch(session, &entry, &progress));
		/* Flag an error if the pop failed. */
		WT_ERR(ret);

		/*
		 * Next the general operations.
		 */
		ret = __lsm_worker_general_op(session, cookie, &ran);
		if (ret == EBUSY || ret == WT_NOTFOUND)
			ret = 0;
		WT_ERR(ret);
		progress = progress || ran;

		/*
		 * Finally see if there is any merge work we can do.  This is
		 * last because the earlier operations may result in adding
		 * merge work to the queue.
		 */
		if (FLD_ISSET(cookie->type, WT_LSM_WORK_MERGE) &&
		    (ret = __wt_lsm_manager_pop_entry(
		    session, WT_LSM_WORK_MERGE, &entry)) == 0 &&
		    entry != NULL) {
			WT_ASSERT(session, entry->type == WT_LSM_WORK_MERGE);
			ret = __wt_lsm_merge(session,
			    entry->lsm_tree, cookie->id);
			if (ret == WT_NOTFOUND) {
				F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING);
				ret = 0;
			} else if (ret == EBUSY)
				ret = 0;

			/* Paranoia: clear session state. */
			session->dhandle = NULL;

			__wt_lsm_manager_free_work_unit(session, entry);
			entry = NULL;
			progress = 1;
		}
		/* Flag an error if the pop failed. */
		WT_ERR(ret);

		/* Don't busy wait if there was any work to do. */
		if (!progress) {
			WT_ERR(
			    __wt_cond_wait(session, cookie->work_cond, 10000));
			continue;
		}
	}

	if (ret != 0) {
err:		__wt_lsm_manager_free_work_unit(session, entry);
		WT_PANIC_MSG(session, ret,
		    "Error in LSM worker thread %d", cookie->id);
	}
	return (WT_THREAD_RET_VALUE);
}
예제 #7
0
파일: os_mtx_cond.c 프로젝트: mongodb/mongo
/*
 * __wt_cond_wait_signal --
 *	Wait on a mutex, optionally timing out.  If we get it before the time
 * out period expires, let the caller know.
 */
void
__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond,
    uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled)
{
	BOOL sleepret;
	DWORD milliseconds, windows_error;
	bool locked;
	uint64_t milliseconds64;

	locked = false;

	/* Fast path if already signalled. */
	*signalled = true;
	if (__wt_atomic_addi32(&cond->waiters, 1) == 0)
		return;

	__wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
	WT_STAT_CONN_INCR(session, cond_wait);

	EnterCriticalSection(&cond->mtx);
	locked = true;

	/*
	 * It's possible to race with threads waking us up. That's not a problem
	 * if there are multiple wakeups because the next wakeup will get us, or
	 * if we're only pausing for a short period. It's a problem if there's
	 * only a single wakeup, our waker is likely waiting for us to exit.
	 * After acquiring the mutex (so we're guaranteed to be awakened by any
	 * future wakeup call), optionally check if we're OK to keep running.
	 * This won't ensure our caller won't just loop and call us again, but
	 * at least it's not our fault.
	 *
	 * Assert we're not waiting longer than a second if not checking the
	 * run status.
	 */
	WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION);

	if (run_func != NULL && !run_func(session))
		goto skipping;

	if (usecs > 0) {
		milliseconds64 = usecs / WT_THOUSAND;

		/*
		 * Check for 32-bit unsigned integer overflow
		 * INFINITE is max unsigned int on Windows
		 */
		if (milliseconds64 >= INFINITE)
			milliseconds64 = INFINITE - 1;
		milliseconds = (DWORD)milliseconds64;

		/*
		 * 0 would mean the CV sleep becomes a TryCV which we do not
		 * want
		 */
		if (milliseconds == 0)
			milliseconds = 1;

		sleepret = SleepConditionVariableCS(
		    &cond->cond, &cond->mtx, milliseconds);
	} else
		sleepret = SleepConditionVariableCS(
		    &cond->cond, &cond->mtx, INFINITE);

	/*
	 * SleepConditionVariableCS returns non-zero on success, 0 on timeout
	 * or failure.
	 */
	if (sleepret == 0) {
		windows_error = __wt_getlasterror();
		if (windows_error == ERROR_TIMEOUT) {
skipping:		*signalled = false;
			sleepret = 1;
		}
	}

	(void)__wt_atomic_subi32(&cond->waiters, 1);

	if (locked)
		LeaveCriticalSection(&cond->mtx);

	if (sleepret != 0)
		return;

	__wt_err(session,
	    __wt_map_windows_error(windows_error),
	    "SleepConditionVariableCS: %s: %s",
	    cond->name, __wt_formatmessage(session, windows_error));
	WT_PANIC_MSG(session, __wt_map_windows_error(windows_error),
	    "SleepConditionVariableCS: %s", cond->name);
}
예제 #8
0
파일: conn_log.c 프로젝트: mpobrien/mongo
/*
 * __log_server --
 *	The log server thread.
 */
static WT_THREAD_RET
__log_server(void *arg)
{
	struct timespec start, now;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	uint64_t timediff;
	bool did_work, signalled;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	signalled = false;

	/*
	 * Set this to the number of milliseconds we want to run archive and
	 * pre-allocation.  Start it so that we run on the first time through.
	 */
	timediff = WT_THOUSAND;

	/*
	 * The log server thread does a variety of work.  It forces out any
	 * buffered log writes.  It pre-allocates log files and it performs
	 * log archiving.  The reason the wrlsn thread does not force out
	 * the buffered writes is because we want to process and move the
	 * write_lsn forward as quickly as possible.  The same reason applies
	 * to why the log file server thread does not force out the writes.
	 * That thread does fsync calls which can take a long time and we
	 * don't want log records sitting in the buffer over the time it
	 * takes to sync out an earlier file.
	 */
	did_work = true;
	while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
		/*
		 * Slots depend on future activity.  Force out buffered
		 * writes in case we are idle.  This cannot be part of the
		 * wrlsn thread because of interaction advancing the write_lsn
		 * and a buffer may need to wait for the write_lsn to advance
		 * in the case of a synchronous buffer.  We end up with a hang.
		 */
		WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work));

		/*
		 * We don't want to archive or pre-allocate files as often as
		 * we want to force out log buffers.  Only do it once per second
		 * or if the condition was signalled.
		 */
		if (timediff >= WT_THOUSAND || signalled) {

			/*
			 * Perform log pre-allocation.
			 */
			if (conn->log_prealloc > 0) {
				/*
				 * Log file pre-allocation is disabled when a
				 * hot backup cursor is open because we have
				 * agreed not to rename or remove any files in
				 * the database directory.
				 */
				__wt_readlock(session, &conn->hot_backup_lock);
				if (!conn->hot_backup)
					ret = __log_prealloc_once(session);
				__wt_readunlock(
				    session, &conn->hot_backup_lock);
				WT_ERR(ret);
			}

			/*
			 * Perform the archive.
			 */
			if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
				if (__wt_try_writelock(
				    session, &log->log_archive_lock) == 0) {
					ret = __log_archive_once(session, 0);
					__wt_writeunlock(
					    session, &log->log_archive_lock);
					WT_ERR(ret);
				} else
					__wt_verbose(session, WT_VERB_LOG, "%s",
					    "log_archive: Blocked due to open "
					    "log cursor holding archive lock");
			}
		}

		/* Wait until the next event. */
		__wt_epoch(session, &start);
		__wt_cond_auto_wait_signal(
		    session, conn->log_cond, did_work, NULL, &signalled);
		__wt_epoch(session, &now);
		timediff = WT_TIMEDIFF_MS(now, start);
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret, "log server error");
	}
	return (WT_THREAD_RET_VALUE);
}
예제 #9
0
파일: conn_log.c 프로젝트: mpobrien/mongo
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint64_t yield_count;
	uint32_t filenum;
	bool locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = false;
	yield_count = 0;
	while (F_ISSET(conn, WT_CONN_SERVER_LOG)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * The closing file handle should have a correct close
			 * LSN.
			 */
			WT_ASSERT(session,
			    log->log_close_lsn.l.file == filenum);

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				WT_ERR(__wt_fsync(session, close_fh, true));

				/*
				 * We want to have the file size reflect actual
				 * data with minimal pre-allocated zeroed space.
				 * We can't truncate the file during hot backup,
				 * or the underlying file system may not support
				 * truncate: both are OK, it's just more work
				 * during cursor traversal.
				 */
				if (!conn->hot_backup) {
					__wt_readlock(
					    session, &conn->hot_backup_lock);
					if (!conn->hot_backup)
						WT_ERR_ERROR_OK(
						    __wt_ftruncate(session,
						    close_fh,
						    close_end_lsn.l.offset),
						    ENOTSUP);
					__wt_readunlock(
					    session, &conn->hot_backup_lock);
				}
				WT_SET_LSN(&close_end_lsn,
				    close_end_lsn.l.file + 1, 0);
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				__wt_cond_signal(session, log->log_sync_cond);
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 *
			 * We also have to wait for the written LSN and the
			 * sync LSN to be in the same file so that we know we
			 * have synchronized all earlier log files.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				/*
				 * If the sync file is behind either the one
				 * wanted for a background sync or the write LSN
				 * has moved to another file continue to let
				 * this worker thread process that older file
				 * immediately.
				 */
				if ((log->sync_lsn.l.file <
				    log->bg_sync_lsn.l.file) ||
				    (log->sync_lsn.l.file < min_lsn.l.file))
					continue;
				WT_ERR(__wt_fsync(session, log->log_fh, true));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					WT_ASSERT(session,
					    min_lsn.l.file ==
					    log->sync_lsn.l.file);
					log->sync_lsn = min_lsn;
					__wt_cond_signal(
					    session, log->log_sync_cond);
				}
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				__wt_cond_signal(session, conn->log_wrlsn_cond);
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				yield_count++;
				__wt_yield();
				continue;
			}
		}

		/* Wait until the next event. */
		__wt_cond_wait(session, conn->log_file_cond, 100000, NULL);
	}

	if (0) {
err:		WT_PANIC_MSG(session, ret, "log close server error");
	}
	WT_STAT_CONN_INCRV(session, log_server_sync_blocked, yield_count);
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}