Example #1
0
/*
 * __wt_checkpoint_signal --
 *	Signal the checkpoint thread if sufficient log has been written.
 *	Return 1 if this signals the checkpoint thread, 0 otherwise.
 */
int
__wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize)
{
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);
	WT_ASSERT(session, WT_CKPT_LOGSIZE(conn));
	if (logsize >= conn->ckpt_logsize && !conn->ckpt_signalled) {
		WT_RET(__wt_cond_signal(session, conn->ckpt_cond));
		conn->ckpt_signalled = 1;
	}
	return (0);
}
Example #2
0
/*
 * __wt_lsm_compact --
 *	Compact an LSM tree called via __wt_schema_worker.
 */
int
__wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
{
	WT_DECL_RET;
	WT_LSM_TREE *lsm_tree;
	uint64_t last_merge_progressing;
	time_t begin, end;

	/*
	 * This function is applied to all matching sources: ignore anything
	 * that is not an LSM tree.
	 */
	if (!WT_PREFIX_MATCH(name, "lsm:"))
		return (0);

	/* Tell __wt_schema_worker not to look inside the LSM tree. */
	*skip = 1;

	WT_RET(__wt_lsm_tree_get(session, name, 0, &lsm_tree));

	if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE) ||
	    lsm_tree->merge_threads == 0)
		WT_RET_MSG(session, EINVAL,
		    "LSM compaction requires active merge threads");

	WT_RET(__wt_seconds(session, &begin));

	F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);

	/* Wake up the merge threads. */
	WT_RET(__wt_cond_signal(session, lsm_tree->work_cond));

	/* Now wait for merge activity to stop. */
	do {
		last_merge_progressing = lsm_tree->merge_progressing;
		__wt_sleep(1, 0);
		WT_RET(__wt_seconds(session, &end));
		if (session->compact->max_time > 0 &&
		    session->compact->max_time < (uint64_t)(end - begin))
			WT_ERR(ETIMEDOUT);
	} while (lsm_tree->merge_progressing != last_merge_progressing &&
	    lsm_tree->nchunks > 1);

err:	F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);

	return (ret);
}
Example #3
0
/*
 * __thread_group_shrink --
 *	Decrease the number of running threads in the group, and free any
 *	memory associated with slots larger than the new count.
 */
static int
__thread_group_shrink(WT_SESSION_IMPL *session,
    WT_THREAD_GROUP *group, uint32_t new_count)
{
	WT_DECL_RET;
	WT_SESSION *wt_session;
	WT_THREAD *thread;
	uint32_t current_slot;

	WT_ASSERT(session,
	    __wt_rwlock_islocked(session, group->lock));

	for (current_slot = group->alloc; current_slot > new_count; ) {
		/*
		 * The offset value is a counter not an array index,
		 * so adjust it before finding the last thread in the group.
		 */
		thread = group->threads[--current_slot];

		if (thread == NULL)
			continue;

		/* Wake threads to ensure they notice the state change */
		if (thread->tid != 0) {
			__wt_verbose(session, WT_VERB_THREAD_GROUP,
			    "Stopping utility thread: %p:%" PRIu32,
			    (void *)group, thread->id);
			F_CLR(thread, WT_THREAD_RUN);
			__wt_cond_signal(session, group->wait_cond);
			WT_TRET(__wt_thread_join(session, thread->tid));
			thread->tid = 0;
		}

		if (thread->session != NULL) {
			wt_session = (WT_SESSION *)thread->session;
			WT_TRET(wt_session->close(wt_session, NULL));
			thread->session = NULL;
		}
		__wt_free(session, thread);
		group->threads[current_slot] = NULL;
	}

	/* Update the thread group state to match our changes */
	group->current_threads = current_slot;
	return (ret);
}
Example #4
0
/*
 * __conn_reconfigure --
 *	WT_CONNECTION->reconfigure method.
 */
static int
__conn_reconfigure(WT_CONNECTION *wt_conn, const char *config)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *session;

	/*
	 * Special version of cfg that doesn't include the default config: used
	 * to limit changes to values that the application sets explicitly.
	 * Note that any function using this value has to be prepared to handle
	 * not-found as a valid option return.
	 */
	const char *raw_cfg[] = { config, NULL };

	conn = (WT_CONNECTION_IMPL *)wt_conn;

	CONNECTION_API_CALL(conn, session, reconfigure, config, cfg);

	/* Turning on statistics clears any existing values. */
	if ((ret =
	    __wt_config_gets(session, raw_cfg, "statistics", &cval)) == 0) {
		conn->statistics = cval.val == 0 ? 0 : 1;
		if (conn->statistics)
			__wt_stat_clear_connection_stats(&conn->stats);
	}
	WT_ERR_NOTFOUND_OK(ret);

	WT_ERR(__wt_conn_cache_pool_config(session, cfg));
	WT_ERR(__wt_cache_config(conn, raw_cfg));

	WT_ERR(__conn_verbose_config(session, raw_cfg));

	/* Wake up the cache pool server so any changes are noticed. */
	if (F_ISSET(conn, WT_CONN_CACHE_POOL))
		WT_ERR(__wt_cond_signal(
		    session, __wt_process.cache_pool->cache_pool_cond));

err:	API_END(session);
	return (ret);
}
Example #5
0
/*
 * __wt_checkpoint_server_destroy --
 *	Destroy the checkpoint server thread.
 */
int
__wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION *wt_session;

	conn = S2C(session);

	F_CLR(conn, WT_CONN_SERVER_CHECKPOINT);
	if (conn->ckpt_tid_set) {
		WT_TRET(__wt_cond_signal(session, conn->ckpt_cond));
		WT_TRET(__wt_thread_join(session, conn->ckpt_tid));
		conn->ckpt_tid_set = 0;
	}
	WT_TRET(__wt_cond_destroy(session, &conn->ckpt_cond));

	__wt_free(session, conn->ckpt_config);

	/* Close the server thread's session. */
	if (conn->ckpt_session != NULL) {
		wt_session = &conn->ckpt_session->iface;
		WT_TRET(wt_session->close(wt_session, NULL));
	}

	/*
	 * Ensure checkpoint settings are cleared - so that reconfigure doesn't
	 * get confused.
	 */
	conn->ckpt_session = NULL;
	conn->ckpt_tid_set = 0;
	conn->ckpt_cond = NULL;
	conn->ckpt_config = NULL;
	conn->ckpt_usecs = 0;

	return (ret);
}
Example #6
0
/*
 * __log_wrlsn_server --
 *	The log wrlsn server thread.
 */
static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
	WT_LOGSLOT *slot;
	WT_SESSION_IMPL *session;
	size_t written_i;
	uint32_t i, save_i;
	int yield;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	yield = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * No need to use the log_slot_lock because the slot pool
		 * is statically allocated and any slot in the
		 * WT_LOG_SLOT_WRITTEN state is exclusively ours for now.
		 */
		i = 0;
		written_i = 0;
		/*
		 * Walk the array once saving any slots that are in the
		 * WT_LOG_SLOT_WRITTEN state.
		 */
		while (i < WT_SLOT_POOL) {
			save_i = i;
			slot = &log->slot_pool[i++];
			if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
				continue;
			written[written_i].slot_index = save_i;
			written[written_i++].lsn = slot->slot_release_lsn;
		}
		/*
		 * If we found any written slots process them.  We sort them
		 * based on the release LSN, and then look for them in order.
		 */
		if (written_i > 0) {
			yield = 0;
			WT_INSERTION_SORT(written, written_i,
			    WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);

			/*
			 * We know the written array is sorted by LSN.  Go
			 * through them either advancing write_lsn or stop
			 * as soon as one is not in order.
			 */
			for (i = 0; i < written_i; i++) {
				if (WT_LOG_CMP(&log->write_lsn,
				    &written[i].lsn) != 0)
					break;
				/*
				 * If we get here we have a slot to process.
				 * Advance the LSN and process the slot.
				 */
				slot = &log->slot_pool[written[i].slot_index];
				WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn,
				    &slot->slot_release_lsn) == 0);
				log->write_start_lsn = slot->slot_start_lsn;
				log->write_lsn = slot->slot_end_lsn;
				WT_ERR(__wt_cond_signal(session,
				    log->log_write_cond));
				WT_STAT_FAST_CONN_INCR(session, log_write_lsn);

				/*
				 * Signal the close thread if needed.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					WT_ERR(__wt_cond_signal(session,
					    conn->log_file_cond));
				WT_ERR(__wt_log_slot_free(session, slot));
			}
		}
		/*
		 * If we saw a later write, we always want to yield because
		 * we know something is in progress.
		 */
		if (yield++ < 1000)
			__wt_yield();
		else
			/* Wait until the next event. */
			WT_ERR(__wt_cond_wait(session,
			    conn->log_wrlsn_cond, 100000));
	}

	if (0)
err:		__wt_err(session, ret, "log wrlsn server error");
	return (WT_THREAD_RET_VALUE);
}
Example #7
0
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, close_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL &&
		    (ret = __wt_log_extract_lognum(session, close_fh->name,
		    &close_lsn.file)) == 0 &&
		    close_lsn.file < log->write_lsn.file) {
			/*
			 * We've copied the file handle, clear out the one in
			 * log structure to allow it to be set again.
			 */
			log->log_close_fh = NULL;
			/*
			 * Set the close_end_lsn to the LSN immediately after
			 * ours.  That is, the beginning of the next log file.
			 * We need to know the LSN file number of our own close
			 * in case earlier calls are still in progress and the
			 * next one to move the sync_lsn into the next file for
			 * later syncs.
			 */
			close_lsn.offset = 0;
			close_end_lsn = close_lsn;
			close_end_lsn.file++;
			WT_ERR(__wt_fsync(session, close_fh));
			__wt_spin_lock(session, &log->log_sync_lock);
			locked = 1;
			WT_ERR(__wt_close(session, &close_fh));
			WT_ASSERT(session,
			    WT_LOG_CMP(&close_end_lsn, &log->sync_lsn) >= 0);
			log->sync_lsn = close_end_lsn;
			WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
			locked = 0;
			__wt_spin_unlock(session, &log->log_sync_lock);
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (WT_LOG_CMP(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * The sync LSN we asked for better be smaller than
			 * the current written LSN.
			 */
			WT_ASSERT(session,
			    WT_LOG_CMP(&log->bg_sync_lsn, &min_lsn) <= 0);
			WT_ERR(__wt_fsync(session, log->log_fh));
			__wt_spin_lock(session, &log->log_sync_lock);
			locked = 1;
			/*
			 * The sync LSN could have advanced while we were
			 * writing to disk.
			 */
			if (WT_LOG_CMP(&log->sync_lsn, &min_lsn) <= 0) {
				log->sync_lsn = min_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_sync_cond));
			}
			locked = 0;
			__wt_spin_unlock(session, &log->log_sync_lock);
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(
		    session, conn->log_file_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
Example #8
0
/*
 * __log_release --
 *	Release a log slot.
 */
static int
__log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN sync_lsn;
	size_t write_size;
	WT_DECL_SPINLOCK_ID(id);			/* Must appear last */

	conn = S2C(session);
	log = conn->log;
	/*
	 * If we're going to have to close our log file, make a local copy
	 * of the file handle structure.
	 */
	close_fh = NULL;
	if (F_ISSET(slot, SLOT_CLOSEFH)) {
		close_fh = log->log_close_fh;
		log->log_close_fh = NULL;
		F_CLR(slot, SLOT_CLOSEFH);
	}

	/* Write the buffered records */
	if (F_ISSET(slot, SLOT_BUFFERED)) {
		write_size = (size_t)
		    (slot->slot_end_lsn.offset - slot->slot_start_offset);
		WT_ERR(__wt_write(session, slot->slot_fh,
		    slot->slot_start_offset, write_size, slot->slot_buf.mem));
	}

	/*
	 * Wait for earlier groups to finish, otherwise there could be holes
	 * in the log file.
	 */
	while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0)
		__wt_yield();
	log->write_lsn = slot->slot_end_lsn;
	/*
	 * Try to consolidate calls to fsync to wait less.  Acquire a spin lock
	 * so that threads finishing writing to the log will wait while the
	 * current fsync completes and advance log->write_lsn.
	 */
	while (F_ISSET(slot, SLOT_SYNC) &&
	    LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
		if (__wt_spin_trylock(session, &log->log_sync_lock, &id) != 0) {
			(void)__wt_cond_wait(
			    session, log->log_sync_cond, 10000);
			continue;
		}
		/*
		 * Record the current end of log after we grabbed the lock.
		 * That is how far our fsync call with guarantee.
		 */
		sync_lsn = log->write_lsn;
		if (LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
			WT_STAT_FAST_CONN_INCR(session, log_sync);
			ret = __wt_fsync(session, log->log_fh);
			if (ret == 0) {
				F_CLR(slot, SLOT_SYNC);
				log->sync_lsn = sync_lsn;
				ret = __wt_cond_signal(
				    session, log->log_sync_cond);
			}
		}
		__wt_spin_unlock(session, &log->log_sync_lock);
		WT_ERR(ret);
	}
	if (F_ISSET(slot, SLOT_BUF_GROW)) {
		WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
		F_CLR(slot, SLOT_BUF_GROW);
		WT_STAT_FAST_CONN_INCRV(session,
		    log_buffer_size, slot->slot_buf.memsize);
		WT_ERR(__wt_buf_grow(session,
		    &slot->slot_buf, slot->slot_buf.memsize * 2));
	}
	/*
	 * If we have a file to close, close it now.
	 */
	if (close_fh)
		WT_ERR(__wt_close(session, close_fh));

err:	if (ret != 0 && slot->slot_error == 0)
		slot->slot_error = ret;
	return (ret);
}
Example #9
0
/*
 * __wt_log_wrlsn --
 *	Process written log slots and attempt to coalesce them if the LSNs
 *	are contiguous.  Returns 1 if slots were freed, 0 if no slots were
 *	freed in the progress arg.  Must be called with the log slot lock held.
 */
int
__wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield)
{
	WT_CONNECTION_IMPL *conn;
	WT_LOG *log;
	WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
	WT_LOGSLOT *coalescing, *slot;
	size_t written_i;
	uint32_t i, save_i;

	conn = S2C(session);
	log = conn->log;
	coalescing = NULL;
	written_i = 0;
	i = 0;
	if (free_i != NULL)
		*free_i = WT_SLOT_POOL;

	/*
	 * Walk the array once saving any slots that are in the
	 * WT_LOG_SLOT_WRITTEN state.
	 */
	while (i < WT_SLOT_POOL) {
		save_i = i;
		slot = &log->slot_pool[i++];
		if (free_i != NULL && *free_i == WT_SLOT_POOL &&
		    slot->slot_state == WT_LOG_SLOT_FREE)
			*free_i = save_i;
		if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
			continue;
		written[written_i].slot_index = save_i;
		written[written_i++].lsn = slot->slot_release_lsn;
	}
	/*
	 * If we found any written slots process them.  We sort them
	 * based on the release LSN, and then look for them in order.
	 */
	if (written_i > 0) {
		/*
		 * If wanted, reset the yield variable to indicate that we
		 * have found written slots.
		 */
		if (yield != NULL)
			*yield = 0;
		WT_INSERTION_SORT(written, written_i,
		    WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);

		/*
		 * We know the written array is sorted by LSN.  Go
		 * through them either advancing write_lsn or coalesce
		 * contiguous ranges of written slots.
		 */
		for (i = 0; i < written_i; i++) {
			slot = &log->slot_pool[written[i].slot_index];
			if (coalescing != NULL) {
				if (WT_LOG_CMP(&coalescing->slot_end_lsn,
				    &written[i].lsn) != 0) {
					coalescing = slot;
					continue;
				}
				/*
				 * If we get here we have a slot to coalesce
				 * and free.
				 */
				coalescing->slot_end_lsn = slot->slot_end_lsn;
				WT_STAT_FAST_CONN_INCR(
				    session, log_slot_coalesced);
				/*
				 * Copy the flag for later closing.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					F_SET(coalescing, WT_SLOT_CLOSEFH);
			} else {
				/*
				 * If this written slot is not the next LSN,
				 * try to start coalescing with later slots.
				 */
				if (WT_LOG_CMP(
				    &log->write_lsn, &written[i].lsn) != 0) {
					coalescing = slot;
					continue;
				}
				/*
				 * If we get here we have a slot to process.
				 * Advance the LSN and process the slot.
				 */
				WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn,
				    &slot->slot_release_lsn) == 0);
				log->write_start_lsn = slot->slot_start_lsn;
				log->write_lsn = slot->slot_end_lsn;
				WT_RET(__wt_cond_signal(
				    session, log->log_write_cond));
				WT_STAT_FAST_CONN_INCR(session, log_write_lsn);
				/*
				 * Signal the close thread if needed.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					WT_RET(__wt_cond_signal(
					    session, conn->log_file_cond));
			}
			WT_RET(__wt_log_slot_free(session, slot));
			if (free_i != NULL && *free_i == WT_SLOT_POOL &&
			    slot->slot_state == WT_LOG_SLOT_FREE)
				*free_i = save_i;
		}
	}
	return (0);
}
Example #10
0
/*
 * __wt_logmgr_open --
 *	Start the log service threads.
 */
int
__wt_logmgr_open(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;

	conn = S2C(session);

	/* If no log thread services are configured, we're done. */ 
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
		return (0);

	/*
	 * Start the log close thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	WT_RET(__wt_open_internal_session(
	    conn, "log-close-server", 0, 0, &conn->log_file_session));
	WT_RET(__wt_cond_alloc(conn->log_file_session,
	    "log close server", 0, &conn->log_file_cond));

	/*
	 * Start the log file close thread.
	 */
	WT_RET(__wt_thread_create(conn->log_file_session,
	    &conn->log_file_tid, __log_file_server, conn->log_file_session));
	conn->log_file_tid_set = 1;

	/*
	 * Start the log write LSN thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	WT_RET(__wt_open_internal_session(
	    conn, "log-wrlsn-server", 0, 0, &conn->log_wrlsn_session));
	WT_RET(__wt_cond_alloc(conn->log_wrlsn_session,
	    "log write lsn server", 0, &conn->log_wrlsn_cond));
	WT_RET(__wt_thread_create(conn->log_wrlsn_session,
	    &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session));
	conn->log_wrlsn_tid_set = 1;

	/* If no log thread services are configured, we're done. */ 
	if (!FLD_ISSET(conn->log_flags,
	    (WT_CONN_LOG_ARCHIVE | WT_CONN_LOG_PREALLOC)))
		return (0);

	/*
	 * If a log server thread exists, the user may have reconfigured
	 * archiving or pre-allocation.  Signal the thread.  Otherwise the
	 * user wants archiving and/or allocation and we need to start up
	 * the thread.
	 */
	if (conn->log_session != NULL) {
		WT_ASSERT(session, conn->log_cond != NULL);
		WT_ASSERT(session, conn->log_tid_set != 0);
		WT_RET(__wt_cond_signal(session, conn->log_cond));
	} else {
		/* The log server gets its own session. */
		WT_RET(__wt_open_internal_session(
		    conn, "log-server", 0, 0, &conn->log_session));
		WT_RET(__wt_cond_alloc(conn->log_session,
		    "log server", 0, &conn->log_cond));

		/*
		 * Start the thread.
		 */
		WT_RET(__wt_thread_create(conn->log_session,
		    &conn->log_tid, __log_server, conn->log_session));
		conn->log_tid_set = 1;
	}

	return (0);
}
Example #11
0
/*
 * __log_slot_new --
 *	Find a free slot and switch it as the new active slot.
 *	Must be called holding the slot lock.
 */
static int
__log_slot_new(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_LOG *log;
	WT_LOGSLOT *slot;
	int32_t i, pool_i;
#ifdef	HAVE_DIAGNOSTIC
	uint64_t time_start, time_stop;
	int count;
#endif

	WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
	conn = S2C(session);
	log = conn->log;
	/*
	 * Although this function is single threaded, multiple threads could
	 * be trying to set a new active slot sequentially.  If we find an
	 * active slot that is valid, return.
	 */
	if ((slot = log->active_slot) != NULL &&
	    WT_LOG_SLOT_OPEN(slot->slot_state))
		return (0);

#ifdef	HAVE_DIAGNOSTIC
	count = 0;
	time_start = __wt_clock(session);
#endif
	/*
	 * Keep trying until we can find a free slot.
	 */
	for (;;) {
		/*
		 * Rotate among the slots to lessen collisions.
		 */
		WT_RET(WT_SESSION_CHECK_PANIC(session));
		for (i = 0, pool_i = log->pool_index; i < WT_SLOT_POOL;
		    i++, pool_i++) {
			if (pool_i >= WT_SLOT_POOL)
				pool_i = 0;
			slot = &log->slot_pool[pool_i];
			if (slot->slot_state == WT_LOG_SLOT_FREE) {
				/*
				 * Acquire our starting position in the
				 * log file.  Assume the full buffer size.
				 */
				WT_RET(__wt_log_acquire(session,
				    log->slot_buf_size, slot));
				/*
				 * We have a new, initialized slot to use.
				 * Set it as the active slot.
				 */
				log->active_slot = slot;
				log->pool_index = pool_i;
				return (0);
			}
		}
		/*
		 * If we didn't find any free slots signal the worker thread.
		 */
		WT_STAT_CONN_INCR(session, log_slot_no_free_slots);
		__wt_cond_signal(session, conn->log_wrlsn_cond);
		__wt_yield();
#ifdef	HAVE_DIAGNOSTIC
		++count;
		if (count > WT_MILLION) {
			time_stop = __wt_clock(session);
			if (WT_CLOCKDIFF_SEC(time_stop, time_start) > 10) {
				__wt_errx(session,
				    "SLOT_NEW: Timeout free slot");
				__log_slot_dump(session);
				__wt_abort(session);
			}
			count = 0;
		}
#endif
	}
	/* NOTREACHED */
}
Example #12
0
/*
 * __logmgr_config --
 *	Parse and setup the logging server options.
 */
static int
__logmgr_config(
    WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	bool enabled;

	/*
	 * A note on reconfiguration: the standard "is this configuration string
	 * allowed" checks should fail if reconfiguration has invalid strings,
	 * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
	 * the connection reconfiguration method doesn't allow those strings.
	 * Additionally, the base configuration values during reconfiguration
	 * are the currently configured values (so we don't revert to default
	 * values when repeatedly reconfiguring), and configuration processing
	 * of a currently set value should not change the currently set value.
	 *
	 * In this code path, log server reconfiguration does not stop/restart
	 * the log server, so there's no point in re-evaluating configuration
	 * strings that cannot be reconfigured, risking bugs in configuration
	 * setup, and depending on evaluation of currently set values to always
	 * result in the currently set value. Skip tests for any configuration
	 * strings which don't make sense during reconfiguration, but don't
	 * worry about error reporting because it should never happen.
	 */

	conn = S2C(session);

	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
	enabled = cval.val != 0;

	/*
	 * If we're reconfiguring, enabled must match the already
	 * existing setting.
	 *
	 * If it is off and the user it turning it on, or it is on
	 * and the user is turning it off, return an error.
	 *
	 * See above: should never happen.
	 */
	if (reconfig &&
	    ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
	    (!enabled && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))))
		WT_RET_MSG(session, EINVAL,
		    "log manager reconfigure: enabled mismatch with existing "
		    "setting");

	/* Logging is incompatible with in-memory */
	if (enabled) {
		WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
		if (cval.val != 0)
			WT_RET_MSG(session, EINVAL,
			    "In-memory configuration incompatible with "
			    "log=(enabled=true)");
	}

	*runp = enabled;

	/*
	 * Setup a log path and compression even if logging is disabled in case
	 * we are going to print a log.  Only do this on creation.  Once a
	 * compressor or log path are set they cannot be changed.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		conn->log_compressor = NULL;
		WT_RET(__wt_config_gets_none(
		    session, cfg, "log.compressor", &cval));
		WT_RET(__wt_compressor_config(
		    session, &cval, &conn->log_compressor));

		WT_RET(__wt_config_gets(session, cfg, "log.path", &cval));
		WT_RET(__wt_strndup(
		    session, cval.str, cval.len, &conn->log_path));
	}

	/* We are done if logging isn't enabled. */
	if (!*runp)
		return (0);

	WT_RET(__wt_config_gets(session, cfg, "log.archive", &cval));
	if (cval.val != 0)
		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);

	/*
	 * The file size cannot be reconfigured. The amount of memory allocated
	 * to the log slots may be based on the log file size at creation and we
	 * don't want to re-allocate that memory while running.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
		conn->log_file_max = (wt_off_t)cval.val;
		WT_STAT_CONN_SET(session, log_max_filesize, conn->log_file_max);
	}

	/*
	 * If pre-allocation is configured, set the initial number to a few.
	 * We'll adapt as load dictates.
	 */
	WT_RET(__wt_config_gets(session, cfg, "log.prealloc", &cval));
	if (cval.val != 0)
		conn->log_prealloc = 1;

	/*
	 * Note it's meaningless to reconfigure this value during runtime, it
	 * only matters on create before recovery runs.
	 *
	 * See above: should never happen.
	 */
	if (!reconfig) {
		WT_RET(__wt_config_gets_def(
		    session, cfg, "log.recover", 0, &cval));
		if (WT_STRING_MATCH("error", cval.str, cval.len))
			FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
	}

	WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
	if (cval.val != 0) {
		if (F_ISSET(conn, WT_CONN_READONLY))
			WT_RET_MSG(session, EINVAL,
			    "Read-only configuration incompatible with "
			    "zero-filling log files");
		FLD_SET(conn->log_flags, WT_CONN_LOG_ZERO_FILL);
	}

	WT_RET(__logmgr_sync_cfg(session, cfg));
	if (conn->log_cond != NULL)
		__wt_cond_signal(session, conn->log_cond);
	return (0);
}
Example #13
0
/*
 * __wt_logmgr_open --
 *	Start the log service threads.
 */
int
__wt_logmgr_open(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	uint32_t session_flags;

	conn = S2C(session);

	/* If no log thread services are configured, we're done. */
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
		return (0);

	F_SET(conn, WT_CONN_SERVER_LOG);

	/*
	 * Start the log close thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	session_flags = WT_SESSION_NO_DATA_HANDLES;
	WT_RET(__wt_open_internal_session(conn,
	    "log-close-server", false, session_flags, &conn->log_file_session));
	WT_RET(__wt_cond_alloc(
	    conn->log_file_session, "log close server", &conn->log_file_cond));

	/*
	 * Start the log file close thread.
	 */
	WT_RET(__wt_thread_create(conn->log_file_session,
	    &conn->log_file_tid, __log_file_server, conn->log_file_session));
	conn->log_file_tid_set = true;

	/*
	 * Start the log write LSN thread.  It is not configurable.
	 * If logging is enabled, this thread runs.
	 */
	WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server",
	    false, session_flags, &conn->log_wrlsn_session));
	WT_RET(__wt_cond_auto_alloc(conn->log_wrlsn_session,
	    "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond));
	WT_RET(__wt_thread_create(conn->log_wrlsn_session,
	    &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session));
	conn->log_wrlsn_tid_set = true;

	/*
	 * If a log server thread exists, the user may have reconfigured
	 * archiving or pre-allocation.  Signal the thread.  Otherwise the
	 * user wants archiving and/or allocation and we need to start up
	 * the thread.
	 */
	if (conn->log_session != NULL) {
		WT_ASSERT(session, conn->log_cond != NULL);
		WT_ASSERT(session, conn->log_tid_set == true);
		__wt_cond_signal(session, conn->log_cond);
	} else {
		/* The log server gets its own session. */
		WT_RET(__wt_open_internal_session(conn,
		    "log-server", false, session_flags, &conn->log_session));
		WT_RET(__wt_cond_auto_alloc(conn->log_session,
		    "log server", 50000, WT_MILLION, &conn->log_cond));

		/*
		 * Start the thread.
		 */
		WT_RET(__wt_thread_create(conn->log_session,
		    &conn->log_tid, __log_server, conn->log_session));
		conn->log_tid_set = true;
	}

	return (0);
}
Example #14
0
/*
 * __lsm_tree_close --
 *	Close an LSM tree structure.
 */
static int
__lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;
	WT_SESSION *wt_session;
	WT_SESSION_IMPL *s;
	uint32_t i;

	if (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
		F_CLR(lsm_tree, WT_LSM_TREE_WORKING);

		/*
		 * Signal all threads to wake them up, then wait for them to
		 * exit.
		 *
		 * !!!
		 * If we have the schema lock, have the LSM worker sessions
		 * inherit the flag before we do anything.  The thread may
		 * already be waiting for the schema lock, but the loop in the
		 * WT_WITH_SCHEMA_LOCK macro takes care of that.
		 */
		if (F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
			for (i = 0; i < lsm_tree->merge_threads; i++) {
				if ((s = lsm_tree->worker_sessions[i]) == NULL)
					continue;
				if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED))
					s->skip_schema_lock = 1;
				WT_TRET(__wt_cond_signal(
				    session, lsm_tree->work_cond));
				WT_TRET(__wt_thread_join(
				    session, lsm_tree->worker_tids[i]));
			}
		if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED))
			lsm_tree->ckpt_session->skip_schema_lock = 1;
		WT_TRET(__wt_cond_signal(session, lsm_tree->work_cond));
		WT_TRET(__wt_thread_join(session, lsm_tree->ckpt_tid));
	}

	/*
	 * Close the worker thread sessions.  Do this in the main thread to
	 * avoid deadlocks.
	 */
	for (i = 0; i < lsm_tree->merge_threads; i++) {
		if ((s = lsm_tree->worker_sessions[i]) == NULL)
			continue;
		lsm_tree->worker_sessions[i] = NULL;
		wt_session = &s->iface;
		WT_TRET(wt_session->close(wt_session, NULL));
	}

	if (lsm_tree->ckpt_session != NULL) {
		wt_session = &lsm_tree->ckpt_session->iface;
		WT_TRET(wt_session->close(wt_session, NULL));
	}
	if (ret != 0) {
		__wt_err(session, ret, "shutdown error while cleaning up LSM");
		(void)__wt_panic(session);
	}

	return (ret);
}
Example #15
0
/*
 * __clsm_open_cursors --
 *	Open cursors for the current set of files.
 */
static int
__clsm_open_cursors(
    WT_CURSOR_LSM *clsm, int update, u_int start_chunk, uint32_t start_id)
{
	WT_CURSOR *c, **cp, *primary;
	WT_DECL_RET;
	WT_LSM_CHUNK *chunk;
	WT_LSM_TREE *lsm_tree;
	WT_SESSION_IMPL *session;
	WT_TXN *txn;
	const char *checkpoint, *ckpt_cfg[3];
	uint64_t saved_gen;
	u_int i, nchunks, ngood, nupdates;
	int locked;

	c = &clsm->iface;
	session = (WT_SESSION_IMPL *)c->session;
	txn = &session->txn;
	lsm_tree = clsm->lsm_tree;
	chunk = NULL;

	ckpt_cfg[0] = WT_CONFIG_BASE(session, session_open_cursor);
	ckpt_cfg[1] = "checkpoint=" WT_CHECKPOINT ",raw";
	ckpt_cfg[2] = NULL;

	/* Copy the key, so we don't lose the cursor position. */
	if (F_ISSET(c, WT_CURSTD_KEY_INT) && !WT_DATA_IN_ITEM(&c->key))
		WT_RET(__wt_buf_set(
		    session, &c->key, c->key.data, c->key.size));

	F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);

	if (update) {
		if (txn->isolation == TXN_ISO_SNAPSHOT)
			F_SET(clsm, WT_CLSM_OPEN_SNAPSHOT);
	} else
		F_SET(clsm, WT_CLSM_OPEN_READ);

	WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 0));
	locked = 1;
	/*
	 * If there is no in-memory chunk in the tree for an update operation,
	 * create one.
	 *
	 * !!!
	 * It is exceeding unlikely that we get here at all, but if we were to
	 * switch chunks in this thread and our transaction roll back, it would
	 * leave the metadata inconsistent.  Signal for the LSM worker thread
	 * to create the chunk instead to avoid the issue.
	 */
	if (update && (lsm_tree->nchunks == 0 ||
	    (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) == NULL ||
	    F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))) {
		/* Release our lock because switch will get a write lock. */
		F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH);
		locked = 0;
		WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
		WT_ERR(__wt_cond_signal(session, lsm_tree->work_cond));

		/*
		 * Give the worker thread a chance to run before locking the
		 * tree again -- we will loop in __clsm_enter until there is an
		 * in-memory chunk in the tree.
		 */
		__wt_sleep(0, 1000);
		WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 0));
		locked = 1;
	}
	F_SET(session, WT_SESSION_NO_CACHE_CHECK);

	/* Merge cursors have already figured out how many chunks they need. */
retry:	if (F_ISSET(clsm, WT_CLSM_MERGE)) {
		nchunks = clsm->nchunks;
		ngood = 0;

		/*
		 * We may have raced with another merge completing.  Check that
		 * we're starting at the right offset in the chunk array.
		 */
		if (start_chunk >= lsm_tree->nchunks ||
		    lsm_tree->chunk[start_chunk]->id != start_id) {
			for (start_chunk = 0;
			    start_chunk < lsm_tree->nchunks;
			    start_chunk++) {
				chunk = lsm_tree->chunk[start_chunk];
				if (chunk->id == start_id)
					break;
			}
			/* We have to find the start chunk: merge locked it. */
			WT_ASSERT(session, start_chunk < lsm_tree->nchunks);
		}

		WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
	} else {
		nchunks = lsm_tree->nchunks;

		/*
		 * If we are only opening the cursor for updates, only open the
		 * primary chunk, plus any other chunks that might be required
		 * to detect snapshot isolation conflicts.
		 */
		if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
			WT_ERR(__wt_realloc_def(session,
			    &clsm->txnid_alloc, nchunks,
			    &clsm->txnid_max));
		if (F_ISSET(clsm, WT_CLSM_OPEN_READ))
			ngood = nupdates = 0;
		else if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
			/*
			 * Keep going until all updates in the next
			 * chunk are globally visible.  Copy the maximum
			 * transaction IDs into the cursor as we go.
			 */
			for (ngood = nchunks - 1, nupdates = 1;
			    ngood > 0;
			    ngood--, nupdates++) {
				chunk = lsm_tree->chunk[ngood - 1];
				clsm->txnid_max[ngood - 1] =
				    chunk->txnid_max;
				if (__wt_txn_visible_all(
				    session, chunk->txnid_max))
					break;
			}
		} else {
			nupdates = 1;
			ngood = nchunks - 1;
		}

		/* Check how many cursors are already open. */
		for (cp = clsm->cursors + ngood;
		    ngood < clsm->nchunks && ngood < nchunks;
		    cp++, ngood++) {
			chunk = lsm_tree->chunk[ngood];

			/* If the cursor isn't open yet, we're done. */
			if (*cp == NULL)
				break;

			/* Easy case: the URIs don't match. */
			if (strcmp((*cp)->uri, chunk->uri) != 0)
				break;

			/* Make sure the checkpoint config matches. */
			checkpoint = ((WT_CURSOR_BTREE *)*cp)->
			    btree->dhandle->checkpoint;
			if (checkpoint == NULL &&
			    F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
			    !chunk->empty)
				break;

			/* Make sure the Bloom config matches. */
			if (clsm->blooms[ngood] == NULL &&
			    F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
				break;
		}

		/* Spurious generation bump? */
		if (ngood == clsm->nchunks && clsm->nchunks == nchunks) {
			clsm->dsk_gen = lsm_tree->dsk_gen;
			goto err;
		}

		/*
		 * Close any cursors we no longer need.  If the cursor is a
		 * pure update cursor, close everything -- we usually only need
		 * a single chunk open in that case and we haven't walked all
		 * of the other slots in the loop above.
		 *
		 * Drop the LSM tree lock while we do this: if the cache is
		 * full, we may block while closing a cursor.  Save the
		 * generation number and retry if it has changed under us.
		 */
		if (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0)
			ngood = 0;
		if (clsm->cursors != NULL && ngood < clsm->nchunks) {
			saved_gen = lsm_tree->dsk_gen;
			locked = 0;
			WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
			WT_ERR(__clsm_close_cursors(
			    clsm, ngood, clsm->nchunks));
			WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 0));
			locked = 1;
			if (lsm_tree->dsk_gen != saved_gen)
				goto retry;
		}

		/* Detach from our old primary. */
		clsm->primary_chunk = NULL;
		clsm->current = NULL;
	}

	WT_ERR(__wt_realloc_def(session,
	    &clsm->bloom_alloc, nchunks, &clsm->blooms));
	WT_ERR(__wt_realloc_def(session,
	    &clsm->cursor_alloc, nchunks, &clsm->cursors));

	clsm->nchunks = nchunks;

	/* Open the cursors for chunks that have changed. */
	for (i = ngood, cp = clsm->cursors + i; i != nchunks; i++, cp++) {
		chunk = lsm_tree->chunk[i + start_chunk];
		/* Copy the maximum transaction ID. */
		if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
			clsm->txnid_max[i] = chunk->txnid_max;

		/*
		 * Read from the checkpoint if the file has been written.
		 * Once all cursors switch, the in-memory tree can be evicted.
		 */
		WT_ASSERT(session, *cp == NULL);
		ret = __wt_open_cursor(session, chunk->uri, c,
		    (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) ?
			ckpt_cfg : NULL, cp);

		/*
		 * XXX kludge: we may have an empty chunk where no checkpoint
		 * was written.  If so, try to open the ordinary handle on that
		 * chunk instead.
		 */
		if (ret == WT_NOTFOUND &&
		    F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
			ret = __wt_open_cursor(
			    session, chunk->uri, c, NULL, cp);
			if (ret == 0)
				chunk->empty = 1;
		}
		WT_ERR(ret);

		if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) &&
		    !F_ISSET(clsm, WT_CLSM_MERGE))
			WT_ERR(__wt_bloom_open(session, chunk->bloom_uri,
			    lsm_tree->bloom_bit_count,
			    lsm_tree->bloom_hash_count,
			    c, &clsm->blooms[i]));

		/* Child cursors always use overwrite and raw mode. */
		F_SET(*cp, WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
	}

	/* The last chunk is our new primary. */
	if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
		clsm->primary_chunk = chunk;
		primary = clsm->cursors[clsm->nchunks - 1];
		WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)(primary))->btree,
		    __wt_btree_evictable(session, 0));
	}

	clsm->dsk_gen = lsm_tree->dsk_gen;
err:	F_CLR(session, WT_SESSION_NO_CACHE_CHECK);
#ifdef HAVE_DIAGNOSTIC
	/* Check that all cursors are open as expected. */
	if (ret == 0 && F_ISSET(clsm, WT_CLSM_OPEN_READ)) {
		for (i = 0, cp = clsm->cursors; i != clsm->nchunks; cp++, i++) {
			chunk = lsm_tree->chunk[i + start_chunk];

			/* Make sure the cursor is open. */
			WT_ASSERT(session, *cp != NULL);

			/* Easy case: the URIs should match. */
			WT_ASSERT(session, strcmp((*cp)->uri, chunk->uri) == 0);

			/* Make sure the checkpoint config matches. */
			checkpoint = ((WT_CURSOR_BTREE *)*cp)->
			    btree->dhandle->checkpoint;
			WT_ASSERT(session,
			    (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
			    !chunk->empty) ?
			    checkpoint != NULL : checkpoint == NULL);

			/* Make sure the Bloom config matches. */
			WT_ASSERT(session,
			    (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) &&
			    !F_ISSET(clsm, WT_CLSM_MERGE)) ?
			    clsm->blooms[i] != NULL : clsm->blooms[i] == NULL);
		}
	}
#endif
	if (locked)
		WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree));
	return (ret);
}
Example #16
0
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint32_t filenum;
	int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * We update the close file handle before updating the
			 * close LSN when changing files.  It is possible we
			 * could see mismatched settings.  If we do, yield
			 * until it is set.  This should rarely happen.
			 */
			while (log->log_close_lsn.file < filenum)
				__wt_yield();

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				close_end_lsn.file++;
				close_end_lsn.offset = 0;
				WT_ERR(__wt_fsync(session, close_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_sync_cond));
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				WT_ERR(__wt_fsync(session, log->log_fh));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = 1;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					log->sync_lsn = min_lsn;
					WT_ERR(__wt_cond_signal(
					    session, log->log_sync_cond));
				}
				locked = 0;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				WT_ERR(__wt_cond_signal(
				    session, conn->log_wrlsn_cond));
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				__wt_yield();
				continue;
			}
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(
		    session, conn->log_file_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
Example #17
0
/*
 * __wt_log_wrlsn --
 *	Process written log slots and attempt to coalesce them if the LSNs
 *	are contiguous.  The purpose of this function is to advance the
 *	write_lsn in LSN order after the buffer is written to the log file.
 */
int
__wt_log_wrlsn(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
	WT_LOGSLOT *coalescing, *slot;
	WT_LSN save_lsn;
	size_t written_i;
	uint32_t i, save_i;

	conn = S2C(session);
	log = conn->log;
	__wt_spin_lock(session, &log->log_writelsn_lock);
restart:
	coalescing = NULL;
	WT_INIT_LSN(&save_lsn);
	written_i = 0;
	i = 0;

	/*
	 * Walk the array once saving any slots that are in the
	 * WT_LOG_SLOT_WRITTEN state.
	 */
	while (i < WT_SLOT_POOL) {
		save_i = i;
		slot = &log->slot_pool[i++];
		/*
		 * XXX - During debugging I saw slot 0 become orphaned.
		 * I believe it is fixed, but check for now.
		 * This assertion should catch that.
		 */
		if (slot->slot_state == 0)
			WT_ASSERT(session,
			    slot->slot_release_lsn.file >= log->write_lsn.file);
		if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
			continue;
		written[written_i].slot_index = save_i;
		written[written_i++].lsn = slot->slot_release_lsn;
	}
	/*
	 * If we found any written slots process them.  We sort them
	 * based on the release LSN, and then look for them in order.
	 */
	if (written_i > 0) {
		WT_INSERTION_SORT(written, written_i,
		    WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT);
		/*
		 * We know the written array is sorted by LSN.  Go
		 * through them either advancing write_lsn or coalesce
		 * contiguous ranges of written slots.
		 */
		for (i = 0; i < written_i; i++) {
			slot = &log->slot_pool[written[i].slot_index];
			/*
			 * The log server thread pushes out slots periodically.
			 * Sometimes they are empty slots.  If we find an
			 * empty slot, where empty means the start and end LSN
			 * are the same, free it and continue.
			 */
			if (__wt_log_cmp(&slot->slot_start_lsn,
			    &slot->slot_release_lsn) == 0 &&
			    __wt_log_cmp(&slot->slot_start_lsn,
			    &slot->slot_end_lsn) == 0) {
				__wt_log_slot_free(session, slot);
				continue;
			}
			if (coalescing != NULL) {
				/*
				 * If the write_lsn changed, we may be able to
				 * process slots.  Try again.
				 */
				if (__wt_log_cmp(
				    &log->write_lsn, &save_lsn) != 0)
					goto restart;
				if (__wt_log_cmp(&coalescing->slot_end_lsn,
				    &written[i].lsn) != 0) {
					coalescing = slot;
					continue;
				}
				/*
				 * If we get here we have a slot to coalesce
				 * and free.
				 */
				coalescing->slot_last_offset =
				    slot->slot_last_offset;
				coalescing->slot_end_lsn = slot->slot_end_lsn;
				WT_STAT_FAST_CONN_INCR(
				    session, log_slot_coalesced);
				/*
				 * Copy the flag for later closing.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					F_SET(coalescing, WT_SLOT_CLOSEFH);
			} else {
				/*
				 * If this written slot is not the next LSN,
				 * try to start coalescing with later slots.
				 * A synchronous write may update write_lsn
				 * so save the last one we saw to check when
				 * coalescing slots.
				 */
				save_lsn = log->write_lsn;
				if (__wt_log_cmp(
				    &log->write_lsn, &written[i].lsn) != 0) {
					coalescing = slot;
					continue;
				}
				/*
				 * If we get here we have a slot to process.
				 * Advance the LSN and process the slot.
				 */
				WT_ASSERT(session, __wt_log_cmp(&written[i].lsn,
				    &slot->slot_release_lsn) == 0);
				if (slot->slot_start_lsn.offset !=
				    slot->slot_last_offset)
					slot->slot_start_lsn.offset =
					    slot->slot_last_offset;
				log->write_start_lsn = slot->slot_start_lsn;
				log->write_lsn = slot->slot_end_lsn;
				WT_ERR(__wt_cond_signal(
				    session, log->log_write_cond));
				WT_STAT_FAST_CONN_INCR(session, log_write_lsn);
				/*
				 * Signal the close thread if needed.
				 */
				if (F_ISSET(slot, WT_SLOT_CLOSEFH))
					WT_ERR(__wt_cond_signal(
					    session, conn->log_file_cond));
			}
			__wt_log_slot_free(session, slot);
		}
	}
err:	__wt_spin_unlock(session, &log->log_writelsn_lock);
	return (ret);
}
Example #18
0
/*
 * __log_file_server --
 *	The log file server thread.  This worker thread manages
 *	log file operations such as closing and syncing.
 */
static WT_THREAD_RET
__log_file_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *close_fh;
	WT_LOG *log;
	WT_LSN close_end_lsn, min_lsn;
	WT_SESSION_IMPL *session;
	uint32_t filenum;
	bool locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = false;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * If there is a log file to close, make sure any outstanding
		 * write operations have completed, then fsync and close it.
		 */
		if ((close_fh = log->log_close_fh) != NULL) {
			WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
			    &filenum));
			/*
			 * We update the close file handle before updating the
			 * close LSN when changing files.  It is possible we
			 * could see mismatched settings.  If we do, yield
			 * until it is set.  This should rarely happen.
			 */
			while (log->log_close_lsn.l.file < filenum)
				__wt_yield();

			if (__wt_log_cmp(
			    &log->write_lsn, &log->log_close_lsn) >= 0) {
				/*
				 * We've copied the file handle, clear out the
				 * one in the log structure to allow it to be
				 * set again.  Copy the LSN before clearing
				 * the file handle.
				 * Use a barrier to make sure the compiler does
				 * not reorder the following two statements.
				 */
				close_end_lsn = log->log_close_lsn;
				WT_FULL_BARRIER();
				log->log_close_fh = NULL;
				/*
				 * Set the close_end_lsn to the LSN immediately
				 * after ours.  That is, the beginning of the
				 * next log file.   We need to know the LSN
				 * file number of our own close in case earlier
				 * calls are still in progress and the next one
				 * to move the sync_lsn into the next file for
				 * later syncs.
				 */
				WT_ERR(__wt_fsync(session, close_fh, true));

				/*
				 * We want to have the file size reflect actual
				 * data with minimal pre-allocated zeroed space.
				 * We can't truncate the file during hot backup,
				 * or the underlying file system may not support
				 * truncate: both are OK, it's just more work
				 * during cursor traversal.
				 */
				if (!conn->hot_backup) {
					__wt_readlock(
					    session, conn->hot_backup_lock);
					if (!conn->hot_backup)
						WT_ERR_ERROR_OK(
						    __wt_ftruncate(session,
						    close_fh,
						    close_end_lsn.l.offset),
						    ENOTSUP);
					__wt_readunlock(
					    session, conn->hot_backup_lock);
				}
				WT_SET_LSN(&close_end_lsn,
				    close_end_lsn.l.file + 1, 0);
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				WT_ERR(__wt_close(session, &close_fh));
				WT_ASSERT(session, __wt_log_cmp(
				    &close_end_lsn, &log->sync_lsn) >= 0);
				log->sync_lsn = close_end_lsn;
				__wt_cond_signal(session, log->log_sync_cond);
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			}
		}
		/*
		 * If a later thread asked for a background sync, do it now.
		 */
		if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
			/*
			 * Save the latest write LSN which is the minimum
			 * we will have written to disk.
			 */
			min_lsn = log->write_lsn;
			/*
			 * We have to wait until the LSN we asked for is
			 * written.  If it isn't signal the wrlsn thread
			 * to get it written.
			 *
			 * We also have to wait for the written LSN and the
			 * sync LSN to be in the same file so that we know we
			 * have synchronized all earlier log files.
			 */
			if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) {
				/*
				 * If the sync file is behind either the one
				 * wanted for a background sync or the write LSN
				 * has moved to another file continue to let
				 * this worker thread process that older file
				 * immediately.
				 */
				if ((log->sync_lsn.l.file <
				    log->bg_sync_lsn.l.file) ||
				    (log->sync_lsn.l.file < min_lsn.l.file))
					continue;
				WT_ERR(__wt_fsync(session, log->log_fh, true));
				__wt_spin_lock(session, &log->log_sync_lock);
				locked = true;
				/*
				 * The sync LSN could have advanced while we
				 * were writing to disk.
				 */
				if (__wt_log_cmp(
				    &log->sync_lsn, &min_lsn) <= 0) {
					WT_ASSERT(session,
					    min_lsn.l.file ==
					    log->sync_lsn.l.file);
					log->sync_lsn = min_lsn;
					__wt_cond_signal(
					    session, log->log_sync_cond);
				}
				locked = false;
				__wt_spin_unlock(session, &log->log_sync_lock);
			} else {
				__wt_cond_auto_signal(
				    session, conn->log_wrlsn_cond);
				/*
				 * We do not want to wait potentially a second
				 * to process this.  Yield to give the wrlsn
				 * thread a chance to run and try again in
				 * this case.
				 */
				__wt_yield();
				continue;
			}
		}
		/* Wait until the next event. */
		__wt_cond_wait(session, conn->log_file_cond, WT_MILLION / 10);
	}

	if (0) {
err:		__wt_err(session, ret, "log close server error");
	}
	if (locked)
		__wt_spin_unlock(session, &log->log_sync_lock);
	return (WT_THREAD_RET_VALUE);
}
Example #19
0
/*
 * __wt_logmgr_destroy --
 *	Destroy the log archiving server thread and logging subsystem.
 */
int
__wt_logmgr_destroy(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION *wt_session;

	conn = S2C(session);

	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
		/*
		 * We always set up the log_path so printlog can work without
		 * recovery. Therefore, always free it, even if logging isn't
		 * on.
		 */
		__wt_free(session, conn->log_path);
		return (0);
	}
	if (conn->log_tid_set) {
		WT_TRET(__wt_cond_signal(session, conn->log_cond));
		WT_TRET(__wt_thread_join(session, conn->log_tid));
		conn->log_tid_set = 0;
	}
	if (conn->log_file_tid_set) {
		WT_TRET(__wt_cond_signal(session, conn->log_file_cond));
		WT_TRET(__wt_thread_join(session, conn->log_file_tid));
		conn->log_file_tid_set = 0;
	}
	if (conn->log_file_session != NULL) {
		wt_session = &conn->log_file_session->iface;
		WT_TRET(wt_session->close(wt_session, NULL));
		conn->log_file_session = NULL;
	}
	if (conn->log_wrlsn_tid_set) {
		WT_TRET(__wt_cond_signal(session, conn->log_wrlsn_cond));
		WT_TRET(__wt_thread_join(session, conn->log_wrlsn_tid));
		conn->log_wrlsn_tid_set = 0;
	}
	if (conn->log_wrlsn_session != NULL) {
		wt_session = &conn->log_wrlsn_session->iface;
		WT_TRET(wt_session->close(wt_session, NULL));
		conn->log_wrlsn_session = NULL;
	}

	WT_TRET(__wt_log_slot_destroy(session));
	WT_TRET(__wt_log_close(session));

	/* Close the server thread's session. */
	if (conn->log_session != NULL) {
		wt_session = &conn->log_session->iface;
		WT_TRET(wt_session->close(wt_session, NULL));
		conn->log_session = NULL;
	}

	/* Destroy the condition variables now that all threads are stopped */
	WT_TRET(__wt_cond_destroy(session, &conn->log_cond));
	WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond));
	WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond));

	WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond));
	WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond));
	WT_TRET(__wt_rwlock_destroy(session, &conn->log->log_archive_lock));
	__wt_spin_destroy(session, &conn->log->log_lock);
	__wt_spin_destroy(session, &conn->log->log_slot_lock);
	__wt_spin_destroy(session, &conn->log->log_sync_lock);
	__wt_spin_destroy(session, &conn->log->log_writelsn_lock);
	__wt_free(session, conn->log_path);
	__wt_free(session, conn->log);
	return (ret);
}
Example #20
0
/*
 * __wt_meta_track_off --
 *	Turn off metadata operation tracking, unrolling on error.
 */
int
__wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
{
	WT_DECL_RET;
	WT_META_TRACK *trk, *trk_orig;
	WT_SESSION_IMPL *ckpt_session;
	int saved_ret;
	bool did_drop;

	saved_ret = 0;

	WT_ASSERT(session,
	    WT_META_TRACKING(session) && session->meta_track_nest > 0);

	trk_orig = session->meta_track;
	trk = session->meta_track_next;

	/* If it was a nested transaction, there is nothing to do. */
	if (--session->meta_track_nest != 0)
		return (0);

	/* Turn off tracking for unroll. */
	session->meta_track_next = session->meta_track_sub = NULL;

	/*
	 * If there were no operations logged, skip unnecessary metadata
	 * checkpoints.  For example, this happens if attempting to create a
	 * data source that already exists (or drop one that doesn't).
	 */
	if (trk == trk_orig)
		goto err;

	/* Unrolling doesn't require syncing the metadata. */
	if (unroll)
		goto err;

	if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
		F_CLR(session, WT_SESSION_SCHEMA_TXN);
#ifdef WT_ENABLE_SCHEMA_TXN
		WT_ERR(__wt_txn_commit(session, NULL));
		__wt_errx(session, "TRACK: Commit internal schema txn");
#endif
	}

	/*
	 * If we don't have the metadata cursor (e.g, we're in the process of
	 * creating the metadata), we can't sync it.
	 */
	if (!need_sync || session->meta_cursor == NULL ||
	    F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
		goto err;

	/* If we're logging, make sure the metadata update was flushed. */
	if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
		WT_WITH_DHANDLE(session,
		    WT_SESSION_META_DHANDLE(session),
		    ret = __wt_txn_checkpoint_log(
		    session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
	else {
		WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
		ckpt_session = S2C(session)->meta_ckpt_session;
		/*
		 * If this operation is part of a running transaction, that
		 * should be included in the checkpoint.
		 */
		ckpt_session->txn.id = session->txn.id;
		WT_ASSERT(session,
		    !F_ISSET(session, WT_SESSION_LOCKED_METADATA));
		WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session),
		    WT_WITH_METADATA_LOCK(ckpt_session,
			ret = __wt_checkpoint(ckpt_session, NULL)));
		ckpt_session->txn.id = WT_TXN_NONE;
		if (ret == 0)
			WT_WITH_DHANDLE(session,
			    WT_SESSION_META_DHANDLE(session),
			    ret = __wt_checkpoint_sync(session, NULL));
	}

err:	/*
	 * Undo any tracked operations on failure.
	 * Apply any tracked operations post-commit.
	 */
	did_drop = false;
	if (unroll || ret != 0) {
		saved_ret = ret;
		ret = 0;
		while (--trk >= trk_orig) {
			did_drop = did_drop || trk->op == WT_ST_DROP_COMMIT;
			WT_TRET(__meta_track_unroll(session, trk));
		}
	} else
		for (; trk_orig < trk; trk_orig++) {
			did_drop = did_drop ||
			    trk_orig->op == WT_ST_DROP_COMMIT;
			WT_TRET(__meta_track_apply(session, trk_orig));
		}

	if (F_ISSET(session, WT_SESSION_SCHEMA_TXN)) {
		F_CLR(session, WT_SESSION_SCHEMA_TXN);
		/*
		 * We should have committed above unless we're unrolling, there
		 * was an error or the operation was a noop.
		 */
		WT_ASSERT(session, unroll || saved_ret != 0 ||
		    session->txn.mod_count == 0);
#ifdef WT_ENABLE_SCHEMA_TXN
		__wt_err(session, saved_ret,
		    "TRACK: Abort internal schema txn");
		WT_TRET(__wt_txn_rollback(session, NULL));
#endif
	}

	/*
	 * Wake up the sweep thread: particularly for the in-memory
	 * storage engine, we want to reclaim space immediately.
	 */
	if (did_drop && S2C(session)->sweep_cond != NULL)
		__wt_cond_signal(session, S2C(session)->sweep_cond);

	if (ret != 0)
		WT_PANIC_RET(session, ret,
		    "failed to apply or unroll all tracked operations");
	return (saved_ret == 0 ? 0 : saved_ret);
}