Ejemplo n.º 1
0
/*
 * __backup_stop --
 *	Stop a backup.
 */
static int
__backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	int i;

	conn = S2C(session);

	/* Release all btree names held by the backup. */
	__wt_writelock(session, &conn->hot_backup_lock);
	conn->hot_backup_list = NULL;
	__wt_writeunlock(session, &conn->hot_backup_lock);
	if (cb->list != NULL) {
		for (i = 0; cb->list[i] != NULL; ++i)
			__wt_free(session, cb->list[i]);
		__wt_free(session, cb->list);
	}

	/* Remove any backup specific file. */
	WT_TRET(__wt_backup_file_remove(session));

	/* Checkpoint deletion can proceed, as can the next hot backup. */
	__wt_writelock(session, &conn->hot_backup_lock);
	conn->hot_backup = false;
	__wt_writeunlock(session, &conn->hot_backup_lock);

	return (ret);
}
Ejemplo n.º 2
0
/*
 * __wt_log_truncate_files --
 *	Truncate log files via archive once. Requires that the server is not
 *	currently running.
 */
int
__wt_log_truncate_files(
    WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[])
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	uint32_t backup_file, locked;

	WT_UNUSED(cfg);
	conn = S2C(session);
	log = conn->log;
	if (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE))
		WT_RET_MSG(session, EINVAL,
		    "Attempt to archive manually while a server is running");

	backup_file = 0;
	if (cursor != NULL)
		backup_file = WT_CURSOR_BACKUP_ID(cursor);
	WT_ASSERT(session, backup_file <= log->alloc_lsn.file);
	WT_RET(__wt_verbose(session, WT_VERB_LOG,
	    "log_truncate_files: Archive once up to %" PRIu32,
	    backup_file));
	WT_RET(__wt_writelock(session, log->log_archive_lock));
	locked = 1;
	WT_ERR(__log_archive_once(session, backup_file));
	WT_ERR(__wt_writeunlock(session, log->log_archive_lock));
	locked = 0;
err:
	if (locked)
		WT_RET(__wt_writeunlock(session, log->log_archive_lock));
	return (ret);
}
Ejemplo n.º 3
0
/*
 * __wt_conn_btree_get --
 *	Get an open btree file handle, otherwise open a new one.
 */
int
__wt_conn_btree_get(WT_SESSION_IMPL *session,
    const char *name, const char *ckpt, const char *cfg[], uint32_t flags)
{
	WT_DATA_HANDLE *dhandle;
	WT_DECL_RET;

	if (LF_ISSET(WT_DHANDLE_HAVE_REF))
		WT_RET(
		    __conn_dhandle_open_lock(session, session->dhandle, flags));
	else {
		WT_WITH_DHANDLE_LOCK(session,
		    ret = __conn_dhandle_get(session, name, ckpt, flags));
		WT_RET(ret);
	}
	dhandle = session->dhandle;

	if (!LF_ISSET(WT_DHANDLE_LOCK_ONLY) &&
	    (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
	    LF_ISSET(WT_BTREE_SPECIAL_FLAGS)))
		if ((ret = __conn_btree_open(session, cfg, flags)) != 0) {
			F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
			WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
		}

	WT_ASSERT(session, ret != 0 ||
	    LF_ISSET(WT_DHANDLE_EXCLUSIVE) ==
	    F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));

	return (ret);
}
Ejemplo n.º 4
0
/*
 * __wt_log_truncate_files --
 *	Truncate log files via archive once. Requires that the server is not
 *	currently running.
 */
int
__wt_log_truncate_files(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool force)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	uint32_t backup_file;

	conn = S2C(session);
	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
		return (0);
	if (!force && F_ISSET(conn, WT_CONN_SERVER_LOG) &&
	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE))
		WT_RET_MSG(session, EINVAL,
		    "Attempt to archive manually while a server is running");

	log = conn->log;

	backup_file = 0;
	if (cursor != NULL) {
		WT_ASSERT(session, force == false);
		backup_file = WT_CURSOR_BACKUP_ID(cursor);
	}
	WT_ASSERT(session, backup_file <= log->alloc_lsn.l.file);
	__wt_verbose(session, WT_VERB_LOG,
	    "log_truncate_files: Archive once up to %" PRIu32, backup_file);

	__wt_writelock(session, &log->log_archive_lock);
	ret = __log_archive_once(session, backup_file);
	__wt_writeunlock(session, &log->log_archive_lock);
	return (ret);
}
Ejemplo n.º 5
0
/*
 * __log_server --
 *	The log server thread.
 */
static WT_THREAD_RET
__log_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	u_int locked;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = 0;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Perform log pre-allocation.
		 */
		if (conn->log_prealloc > 0)
			WT_ERR(__log_prealloc_once(session));

		/*
		 * Perform the archive.
		 */
		if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
			if (__wt_try_writelock(
			    session, log->log_archive_lock) == 0) {
				locked = 1;
				WT_ERR(__log_archive_once(session, 0));
				WT_ERR(	__wt_writeunlock(
				    session, log->log_archive_lock));
				locked = 0;
			} else
				WT_ERR(__wt_verbose(session, WT_VERB_LOG,
				    "log_archive: Blocked due to open log "
				    "cursor holding archive lock"));
		}
		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait(session, conn->log_cond, WT_MILLION));
	}

	if (0) {
err:		__wt_err(session, ret, "log server error");
	}
	if (locked)
		(void)__wt_writeunlock(session, log->log_archive_lock);
	return (WT_THREAD_RET_VALUE);
}
Ejemplo n.º 6
0
/*
 * __wt_lsm_tree_writeunlock --
 *	Release an exclusive lock on an LSM tree.
 */
int
__wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
	WT_DECL_RET;

	F_CLR(session, WT_SESSION_NO_CACHE_CHECK | WT_SESSION_NO_SCHEMA_LOCK);

	if ((ret = __wt_writeunlock(session, lsm_tree->rwlock)) != 0)
		WT_PANIC_RET(session, ret, "Unlocking an LSM tree");
	return (0);
}
Ejemplo n.º 7
0
/*
 * __wt_thread_group_resize --
 *	Resize an array of utility threads taking the lock.
 */
int
__wt_thread_group_resize(
    WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
    uint32_t new_min, uint32_t new_max, uint32_t flags)
{
	WT_DECL_RET;

	__wt_verbose(session, WT_VERB_THREAD_GROUP,
	    "Resize thread group: %p, from min: %" PRIu32 " -> %" PRIu32
	    " from max: %" PRIu32 " -> %" PRIu32,
	    (void *)group, group->min, new_min, group->max, new_max);

	__wt_writelock(session, group->lock);
	WT_TRET(__thread_group_resize(session, group, new_min, new_max, flags));
	__wt_writeunlock(session, group->lock);
	return (ret);
}
Ejemplo n.º 8
0
/*
 * __wt_ovfl_discard --
 *	Discard an on-page overflow value, and reset the page's cell.
 */
int
__wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell)
{
	WT_BM *bm;
	WT_BTREE *btree;
	WT_CELL_UNPACK *unpack, _unpack;
	WT_DECL_RET;

	btree = S2BT(session);
	bm = btree->bm;
	unpack = &_unpack;

	__wt_cell_unpack(cell, unpack);

	/*
	 * Finally remove overflow key/value objects, called when reconciliation
	 * finishes after successfully writing a page.
	 *
	 * Keys must have already been instantiated and value objects must have
	 * already been cached (if they might potentially still be read by any
	 * running transaction).
	 *
	 * Acquire the overflow lock to avoid racing with a thread reading the
	 * backing overflow blocks.
	 */
	WT_RET(__wt_writelock(session, btree->ovfl_lock));

	switch (unpack->raw) {
	case WT_CELL_KEY_OVFL:
		__wt_cell_type_reset(session,
		    unpack->cell, WT_CELL_KEY_OVFL, WT_CELL_KEY_OVFL_RM);
		break;
	case WT_CELL_VALUE_OVFL:
		__wt_cell_type_reset(session,
		    unpack->cell, WT_CELL_VALUE_OVFL, WT_CELL_VALUE_OVFL_RM);
		break;
	WT_ILLEGAL_VALUE(session);
	}

	WT_TRET(__wt_writeunlock(session, btree->ovfl_lock));

	/* Free the backing disk blocks. */
	WT_TRET(bm->free(bm, session, unpack->data, unpack->size));

	return (ret);
}
Ejemplo n.º 9
0
/*
 * __backup_stop --
 *	Stop a backup.
 */
static int
__backup_stop(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;

	conn = S2C(session);

	/* Remove any backup specific file. */
	ret = __wt_backup_file_remove(session);

	/* Checkpoint deletion can proceed, as can the next hot backup. */
	WT_TRET(__wt_writelock(session, conn->hot_backup_lock));
	conn->hot_backup = false;
	WT_TRET(__wt_writeunlock(session, conn->hot_backup_lock));

	return (ret);
}
Ejemplo n.º 10
0
/*
 * __wt_thread_group_start_one --
 *	Start a new thread if possible.
 */
int
__wt_thread_group_start_one(
    WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait)
{
	WT_DECL_RET;

	if (group->current_threads >= group->max)
		return (0);

	if (wait)
		__wt_writelock(session, group->lock);
	else if (__wt_try_writelock(session, group->lock) != 0)
		return (0);

	/* Recheck the bounds now that we hold the lock */
	if (group->current_threads < group->max)
		WT_TRET(__thread_group_grow(
		    session, group, group->current_threads + 1));
	__wt_writeunlock(session, group->lock);

	return (ret);
}
Ejemplo n.º 11
0
/*
 * __wt_ovfl_track_wrapup_err --
 *	Resolve the page's overflow tracking on reconciliation error.
 */
int
__wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
{
	WT_DECL_RET;
	WT_OVFL_TRACK *track;

	if (page->modify == NULL || page->modify->ovfl_track == NULL)
		return (0);

	track = page->modify->ovfl_track;
	if (track->discard != NULL)
		WT_RET(__ovfl_discard_wrapup_err(session, page));

	if (track->ovfl_reuse[0] != NULL)
		WT_RET(__ovfl_reuse_wrapup_err(session, page));

	if (track->ovfl_txnc[0] != NULL) {
		WT_RET(__wt_writelock(session, S2BT(session)->ovfl_lock));
		ret = __ovfl_txnc_wrapup(session, page);
		WT_TRET(__wt_writeunlock(session, S2BT(session)->ovfl_lock));
	}
	return (0);
}
Ejemplo n.º 12
0
/*
 * __wt_thread_group_create --
 *	Create a new thread group, assumes incoming group structure is
 *	zero initialized.
 */
int
__wt_thread_group_create(
    WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name,
    uint32_t min, uint32_t max, uint32_t flags,
    int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context))
{
	WT_DECL_RET;
	bool cond_alloced;

	/* Check that the structure is initialized as expected */
	WT_ASSERT(session, group->alloc == 0);

	cond_alloced = false;

	__wt_verbose(session, WT_VERB_THREAD_GROUP,
	    "Creating thread group: %p", (void *)group);

	WT_RET(__wt_rwlock_alloc(session, &group->lock, "Thread group"));
	WT_ERR(__wt_cond_alloc(
	    session, "Thread group cond", false, &group->wait_cond));
	cond_alloced = true;

	__wt_writelock(session, group->lock);
	group->run_func = run_func;
	group->name = name;

	WT_TRET(__thread_group_resize(session, group, min, max, flags));
	__wt_writeunlock(session, group->lock);

	/* Cleanup on error to avoid leaking resources */
err:	if (ret != 0) {
		if (cond_alloced)
			WT_TRET(__wt_cond_destroy(session, &group->wait_cond));
		__wt_rwlock_destroy(session, &group->lock);
	}
	return (ret);
}
Ejemplo n.º 13
0
/*
 * __sweep --
 *	Close unused dhandles on the connection dhandle list.
 */
static int
__sweep(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DATA_HANDLE *dhandle, *dhandle_next;
	WT_DECL_RET;
	time_t now;

	conn = S2C(session);

	/* Don't discard handles that have been open recently. */
	WT_RET(__wt_seconds(session, &now));

	dhandle = SLIST_FIRST(&conn->dhlh);
	for (; dhandle != NULL; dhandle = dhandle_next) {
		dhandle_next = SLIST_NEXT(dhandle, l);
		if (dhandle->session_ref != 0 ||
		    now - dhandle->timeofdeath <= WT_DHANDLE_SWEEP_WAIT)
			continue;

		/*
		 * We have a candidate for closing; if it's open, flush dirty
		 * leaf pages, then acquire an exclusive lock on the handle
		 * and close it. We might be blocking opens for a long time
		 * (over disk I/O), but the handle was quiescent for awhile.
		 *
		 * The close can fail if an update cannot be written (updates in
		 * a no-longer-referenced file might not yet be globally visible
		 * if sessions have disjoint sets of files open).  If the handle
		 * is busy, skip it, we'll retry the close the next time, after
		 * the transaction state has progressed.
		 */
		if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
			WT_WITH_DHANDLE(session, dhandle,
			    ret = __wt_cache_op(
			    session, NULL, WT_SYNC_WRITE_LEAVES));
			WT_RET(ret);

			/* Re-check that this looks like a good candidate. */
			if (dhandle->session_ref != 0 ||
			    now - dhandle->timeofdeath <= WT_DHANDLE_SWEEP_WAIT)
				continue;

			/*
			 * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we
			 * want opens to block on us rather than returning an
			 * EBUSY error to the application.
			 */
			ret = __wt_try_writelock(session, dhandle->rwlock);
			if (ret == EBUSY) {
				ret = 0;
				continue;
			}
			WT_RET(ret);

			WT_WITH_DHANDLE(session, dhandle,
			    ret = __wt_conn_btree_sync_and_close(session, 0));
			if (ret == EBUSY)
				ret = 0;

			WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
			WT_RET(ret);
		}

		/*
		 * Attempt to discard the handle (the called function checks the
		 * handle-open flag after acquiring appropriate locks, which is
		 * why we don't do any special handling of EBUSY returns above,
		 * that path never cleared the handle-open flag.
		 */
		WT_WITH_DHANDLE(session, dhandle,
		    ret = __wt_conn_dhandle_discard_single(session, 0));
		if (ret == EBUSY)
			ret = 0;
		WT_RET(ret);
	}
	return (0);
}
Ejemplo n.º 14
0
/*
 * __wt_txn_update_oldest --
 *	Sweep the running transactions to update the oldest ID required.
 */
int
__wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_SESSION_IMPL *oldest_session;
	WT_TXN_GLOBAL *txn_global;
	uint64_t current_id, last_running, oldest_id;
	uint64_t prev_last_running, prev_oldest_id;
	bool strict, wait;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	strict = LF_ISSET(WT_TXN_OLDEST_STRICT);
	wait = LF_ISSET(WT_TXN_OLDEST_WAIT);

	current_id = last_running = txn_global->current;
	prev_last_running = txn_global->last_running;
	prev_oldest_id = txn_global->oldest_id;

	/*
	 * For pure read-only workloads, or if the update isn't forced and the
	 * oldest ID isn't too far behind, avoid scanning.
	 */
	if (prev_oldest_id == current_id ||
	    (!strict && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
		return (0);

	/* First do a read-only scan. */
	if (wait)
		__wt_readlock(session, txn_global->scan_rwlock);
	else if ((ret =
	    __wt_try_readlock(session, txn_global->scan_rwlock)) != 0)
		return (ret == EBUSY ? 0 : ret);
	__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);
	__wt_readunlock(session, txn_global->scan_rwlock);

	/*
	 * If the state hasn't changed (or hasn't moved far enough for
	 * non-forced updates), give up.
	 */
	if ((oldest_id == prev_oldest_id ||
	    (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
	    ((last_running == prev_last_running) ||
	    (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))))
		return (0);

	/* It looks like an update is necessary, wait for exclusive access. */
	if (wait)
		__wt_writelock(session, txn_global->scan_rwlock);
	else if ((ret =
	    __wt_try_writelock(session, txn_global->scan_rwlock)) != 0)
		return (ret == EBUSY ? 0 : ret);

	/*
	 * If the oldest ID has been updated while we waited, don't bother
	 * scanning.
	 */
	if (WT_TXNID_LE(oldest_id, txn_global->oldest_id) &&
	    WT_TXNID_LE(last_running, txn_global->last_running))
		goto done;

	/*
	 * Re-scan now that we have exclusive access.  This is necessary because
	 * threads get transaction snapshots with read locks, and we have to be
	 * sure that there isn't a thread that has got a snapshot locally but
	 * not yet published its snap_min.
	 */
	__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);

#ifdef HAVE_DIAGNOSTIC
	{
	/*
	 * Make sure the ID doesn't move past any named snapshots.
	 *
	 * Don't include the read/assignment in the assert statement.  Coverity
	 * complains if there are assignments only done in diagnostic builds,
	 * and when the read is from a volatile.
	 */
	uint64_t id = txn_global->nsnap_oldest_id;
	WT_ASSERT(session,
	    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
	}
#endif
	/* Update the oldest ID. */
	if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
		txn_global->oldest_id = oldest_id;
	if (WT_TXNID_LT(txn_global->last_running, last_running)) {
		txn_global->last_running = last_running;

#ifdef HAVE_VERBOSE
		/* Output a verbose message about long-running transactions,
		 * but only when some progress is being made. */
		if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
		    current_id - oldest_id > 10000 && oldest_session != NULL) {
			__wt_verbose(session, WT_VERB_TRANSACTION,
			    "old snapshot %" PRIu64
			    " pinned in session %" PRIu32 " [%s]"
			    " with snap_min %" PRIu64 "\n",
			    oldest_id, oldest_session->id,
			    oldest_session->lastop,
			    oldest_session->txn.snap_min);
		}
#endif
	}

done:	__wt_writeunlock(session, txn_global->scan_rwlock);
	return (ret);
}
Ejemplo n.º 15
0
/*
 * __wt_txn_checkpoint_log --
 *	Write a log record for a checkpoint operation.
 */
int
__wt_txn_checkpoint_log(
    WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_ITEM(logrec);
	WT_DECL_RET;
	WT_ITEM *ckpt_snapshot, empty;
	WT_LSN *ckpt_lsn;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	uint8_t *end, *p;
	size_t recsize;
	uint32_t i, rectype;
	const char *fmt;

	conn = S2C(session);
	txn_global = &conn->txn_global;
	txn = &session->txn;
	ckpt_lsn = &txn->ckpt_lsn;

	/*
	 * If this is a file sync, log it unless there is a full checkpoint in
	 * progress.
	 */
	if (!full) {
		if (txn->full_ckpt) {
			if (lsnp != NULL)
				*lsnp = *ckpt_lsn;
			return (0);
		}
		return (__txn_log_file_sync(session, flags, lsnp));
	}

	switch (flags) {
	case WT_TXN_LOG_CKPT_PREPARE:
		txn->full_ckpt = true;

		if (conn->compat_major >= WT_LOG_V2) {
			/*
			 * Write the system log record containing a checkpoint
			 * start operation.
			 */
			rectype = WT_LOGREC_SYSTEM;
			fmt = WT_UNCHECKED_STRING(I);
			WT_ERR(__wt_struct_size(
			    session, &recsize, fmt, rectype));
			WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));

			WT_ERR(__wt_struct_pack(session,
			    (uint8_t *)logrec->data + logrec->size, recsize,
			    fmt, rectype));
			logrec->size += (uint32_t)recsize;
			WT_ERR(__wt_logop_checkpoint_start_pack(
			    session, logrec));
			WT_ERR(__wt_log_write(session, logrec, ckpt_lsn, 0));
		} else {
			WT_ERR(__wt_log_printf(session,
			    "CHECKPOINT: Starting record"));
			WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
		}

		/*
		 * We take and immediately release the visibility lock.
		 * Acquiring the write lock guarantees that any transaction
		 * that has written to the log has also made its transaction
		 * visible at this time.
		 */
		__wt_writelock(session, &txn_global->visibility_rwlock);
		__wt_writeunlock(session, &txn_global->visibility_rwlock);

		/*
		 * We need to make sure that the log records in the checkpoint
		 * LSN are on disk.  In particular to make sure that the
		 * current log file exists.
		 */
		WT_ERR(__wt_log_force_sync(session, ckpt_lsn));
		break;
	case WT_TXN_LOG_CKPT_START:
		/* Take a copy of the transaction snapshot. */
		txn->ckpt_nsnapshot = txn->snapshot_count;
		recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
		WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot));
		p = txn->ckpt_snapshot->mem;
		end = p + recsize;
		for (i = 0; i < txn->snapshot_count; i++)
			WT_ERR(__wt_vpack_uint(
			    &p, WT_PTRDIFF(end, p), txn->snapshot[i]));
		break;
	case WT_TXN_LOG_CKPT_STOP:
		/*
		 * During a clean connection close, we get here without the
		 * prepare or start steps.  In that case, log the current LSN
		 * as the checkpoint LSN.
		 */
		if (!txn->full_ckpt) {
			txn->ckpt_nsnapshot = 0;
			WT_CLEAR(empty);
			ckpt_snapshot = &empty;
			WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
		} else
			ckpt_snapshot = txn->ckpt_snapshot;

		/* Write the checkpoint log record. */
		rectype = WT_LOGREC_CHECKPOINT;
		fmt = WT_UNCHECKED_STRING(IIIIu);
		WT_ERR(__wt_struct_size(session, &recsize,
		    fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
		    txn->ckpt_nsnapshot, ckpt_snapshot));
		WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));

		WT_ERR(__wt_struct_pack(session,
		    (uint8_t *)logrec->data + logrec->size, recsize,
		    fmt, rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
		    txn->ckpt_nsnapshot, ckpt_snapshot));
		logrec->size += (uint32_t)recsize;
		WT_ERR(__wt_log_write(session, logrec, lsnp,
		    F_ISSET(conn, WT_CONN_CKPT_SYNC) ?
		    WT_LOG_FSYNC : 0));

		/*
		 * If this full checkpoint completed successfully and there is
		 * no hot backup in progress and this is not an unclean
		 * recovery, tell the logging subsystem the checkpoint LSN so
		 * that it can archive.  Do not update the logging checkpoint
		 * LSN if this is during a clean connection close, only during
		 * a full checkpoint.  A clean close may not update any
		 * metadata LSN and we do not want to archive in that case.
		 */
		if (!conn->hot_backup &&
		    (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) ||
		    FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) &&
		    txn->full_ckpt)
			__wt_log_ckpt(session, ckpt_lsn);

		/* FALLTHROUGH */
	case WT_TXN_LOG_CKPT_CLEANUP:
		/* Cleanup any allocated resources */
		WT_INIT_LSN(ckpt_lsn);
		txn->ckpt_nsnapshot = 0;
		__wt_scr_free(session, &txn->ckpt_snapshot);
		txn->full_ckpt = false;
		break;
	WT_ILLEGAL_VALUE_ERR(session);
	}

err:	__wt_logrec_free(session, &logrec);
	return (ret);
}
Ejemplo n.º 16
0
/*
 * __sweep --
 *	Close unused dhandles on the connection dhandle list.
 */
static int
__sweep(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DATA_HANDLE *dhandle, *dhandle_next;
	WT_DECL_RET;
	time_t now;
	int locked;

	conn = S2C(session);

	/* Don't discard handles that have been open recently. */
	WT_RET(__wt_seconds(session, &now));

	WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
	dhandle = SLIST_FIRST(&conn->dhlh);
	for (; dhandle != NULL; dhandle = dhandle_next) {
		dhandle_next = SLIST_NEXT(dhandle, l);
		if (WT_IS_METADATA(dhandle))
			continue;
		if (dhandle->session_inuse != 0 ||
		    now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT)
			continue;
		if (dhandle->timeofdeath == 0) {
			dhandle->timeofdeath = now;
			WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
			continue;
		}

		/*
		 * We have a candidate for closing; if it's open, acquire an
		 * exclusive lock on the handle and close it. We might be
		 * blocking opens for a long time (over disk I/O), but the
		 * handle was quiescent for awhile.
		 *
		 * The close can fail if an update cannot be written (updates
		 * in a no-longer-referenced file might not yet be globally
		 * visible if sessions have disjoint sets of files open).  If
		 * the handle is busy, skip it, we'll retry the close the next
		 * time, after the transaction state has progressed.
		 *
		 * We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
		 * opens to block on us rather than returning an EBUSY error to
		 * the application.
		 */
		if ((ret =
		    __wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
			continue;
		WT_RET(ret);
		locked = 1;

		/* If the handle is open, try to close it. */
		if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
			WT_WITH_DHANDLE(session, dhandle,
			    ret = __wt_conn_btree_sync_and_close(session, 0));
			if (ret != 0)
				goto unlock;

			/* We closed the btree handle, bump the statistic. */
			WT_STAT_FAST_CONN_INCR(session, dh_conn_handles);
		}

		/*
		 * If there are no longer any references to the handle in any
		 * sessions, attempt to discard it.  The called function
		 * re-checks that the handle is not in use, which is why we
		 * don't do any special handling of EBUSY returns above.
		 */
		if (dhandle->session_inuse == 0 && dhandle->session_ref == 0) {
			WT_WITH_DHANDLE(session, dhandle,
			    ret = __wt_conn_dhandle_discard_single(session, 0));
			if (ret != 0)
				goto unlock;

			/* If the handle was discarded, it isn't locked. */
			locked = 0;
		} else
			WT_STAT_FAST_CONN_INCR(session, dh_conn_ref);

unlock:		if (locked)
			WT_TRET(__wt_writeunlock(session, dhandle->rwlock));

		WT_RET_BUSY_OK(ret);
	}
	return (0);
}
Ejemplo n.º 17
0
/*
 * __wt_txn_commit --
 *	Commit the current transaction.
 */
int
__wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
	WT_CONFIG_ITEM cval;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_TXN *txn;
	WT_TXN_GLOBAL *txn_global;
	WT_TXN_OP *op;
	u_int i;
	bool locked, readonly;
#ifdef HAVE_TIMESTAMPS
	wt_timestamp_t prev_commit_timestamp, ts;
	bool update_timestamp;
#endif

	txn = &session->txn;
	conn = S2C(session);
	txn_global = &conn->txn_global;
	locked = false;

	WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) ||
	    txn->mod_count == 0);

	readonly = txn->mod_count == 0;
	/*
	 * Look for a commit timestamp.
	 */
	WT_ERR(
	    __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval));
	if (cval.len != 0) {
#ifdef HAVE_TIMESTAMPS
		WT_ERR(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
		WT_ERR(__wt_timestamp_validate(session,
		    "commit", &ts, &cval, true, true, true));
		__wt_timestamp_set(&txn->commit_timestamp, &ts);
		__wt_txn_set_commit_timestamp(session);
#else
		WT_ERR_MSG(session, EINVAL, "commit_timestamp requires a "
		    "version of WiredTiger built with timestamp support");
#endif
	}

#ifdef HAVE_TIMESTAMPS
	/*
	 * Debugging checks on timestamps, if user requested them.
	 */
	if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) &&
	    !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
	    txn->mod_count != 0)
		WT_ERR_MSG(session, EINVAL, "commit_timestamp required and "
		    "none set on this transaction");
	if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) &&
	    F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
	    txn->mod_count != 0)
		WT_ERR_MSG(session, EINVAL, "no commit_timestamp required and "
		    "timestamp set on this transaction");
#endif
	/*
	 * The default sync setting is inherited from the connection, but can
	 * be overridden by an explicit "sync" setting for this transaction.
	 */
	WT_ERR(__wt_config_gets_def(session, cfg, "sync", 0, &cval));

	/*
	 * If the user chose the default setting, check whether sync is enabled
	 * for this transaction (either inherited or via begin_transaction).
	 * If sync is disabled, clear the field to avoid the log write being
	 * flushed.
	 *
	 * Otherwise check for specific settings.  We don't need to check for
	 * "on" because that is the default inherited from the connection.  If
	 * the user set anything in begin_transaction, we only override with an
	 * explicit setting.
	 */
	if (cval.len == 0) {
		if (!FLD_ISSET(txn->txn_logsync, WT_LOG_SYNC_ENABLED) &&
		    !F_ISSET(txn, WT_TXN_SYNC_SET))
			txn->txn_logsync = 0;
	} else {
		/*
		 * If the caller already set sync on begin_transaction then
		 * they should not be using sync on commit_transaction.
		 * Flag that as an error.
		 */
		if (F_ISSET(txn, WT_TXN_SYNC_SET))
			WT_ERR_MSG(session, EINVAL,
			    "Sync already set during begin_transaction");
		if (WT_STRING_MATCH("background", cval.str, cval.len))
			txn->txn_logsync = WT_LOG_BACKGROUND;
		else if (WT_STRING_MATCH("off", cval.str, cval.len))
			txn->txn_logsync = 0;
		/*
		 * We don't need to check for "on" here because that is the
		 * default to inherit from the connection setting.
		 */
	}

	/* Commit notification. */
	if (txn->notify != NULL)
		WT_ERR(txn->notify->notify(txn->notify,
		    (WT_SESSION *)session, txn->id, 1));

	/*
	 * We are about to release the snapshot: copy values into any
	 * positioned cursors so they don't point to updates that could be
	 * freed once we don't have a snapshot.
	 */
	if (session->ncursors > 0) {
		WT_DIAGNOSTIC_YIELD;
		WT_ERR(__wt_session_copy_values(session));
	}

	/* If we are logging, write a commit log record. */
	if (txn->logrec != NULL &&
	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
	    !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
		/*
		 * We are about to block on I/O writing the log.
		 * Release our snapshot in case it is keeping data pinned.
		 * This is particularly important for checkpoints.
		 */
		__wt_txn_release_snapshot(session);
		/*
		 * We hold the visibility lock for reading from the time
		 * we write our log record until the time we release our
		 * transaction so that the LSN any checkpoint gets will
		 * always reflect visible data.
		 */
		__wt_readlock(session, &txn_global->visibility_rwlock);
		locked = true;
		WT_ERR(__wt_txn_log_commit(session, cfg));
	}

	/* Note: we're going to commit: nothing can fail after this point. */

	/* Process and free updates. */
	for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
		switch (op->type) {
		case WT_TXN_OP_BASIC:
		case WT_TXN_OP_BASIC_TS:
		case WT_TXN_OP_INMEM:
			/*
			 * Switch reserved operations to abort to
			 * simplify obsolete update list truncation.
			 */
			if (op->u.upd->type == WT_UPDATE_RESERVED) {
				op->u.upd->txnid = WT_TXN_ABORTED;
				break;
			}

			/*
			 * Writes to the lookaside file can be evicted as soon
			 * as they commit.
			 */
			if (conn->cache->las_fileid != 0 &&
			    op->fileid == conn->cache->las_fileid) {
				op->u.upd->txnid = WT_TXN_NONE;
				break;
			}

#ifdef HAVE_TIMESTAMPS
			if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
			    op->type != WT_TXN_OP_BASIC_TS) {
				WT_ASSERT(session,
				    op->fileid != WT_METAFILE_ID);
				__wt_timestamp_set(&op->u.upd->timestamp,
				    &txn->commit_timestamp);
			}
#endif
			break;

		case WT_TXN_OP_REF:
#ifdef HAVE_TIMESTAMPS
			if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT))
				__wt_timestamp_set(
				    &op->u.ref->page_del->timestamp,
				    &txn->commit_timestamp);
#endif
			break;

		case WT_TXN_OP_TRUNCATE_COL:
		case WT_TXN_OP_TRUNCATE_ROW:
			/* Other operations don't need timestamps. */
			break;
		}

		__wt_txn_op_free(session, op);
	}
	txn->mod_count = 0;

#ifdef HAVE_TIMESTAMPS
	/*
	 * Track the largest commit timestamp we have seen.
	 *
	 * We don't actually clear the local commit timestamp, just the flag.
	 * That said, we can't update the global commit timestamp until this
	 * transaction is visible, which happens when we release it.
	 */
	update_timestamp = F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
#endif

	__wt_txn_release(session);
	if (locked)
		__wt_readunlock(session, &txn_global->visibility_rwlock);

#ifdef HAVE_TIMESTAMPS
	/* First check if we've already committed something in the future. */
	if (update_timestamp) {
		WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
		    __wt_timestamp_set(
			&prev_commit_timestamp, &txn_global->commit_timestamp));
		update_timestamp = __wt_timestamp_cmp(
		    &txn->commit_timestamp, &prev_commit_timestamp) > 0;
	}

	/*
	 * If it looks like we need to move the global commit timestamp,
	 * write lock and re-check.
	 */
	if (update_timestamp) {
#if WT_TIMESTAMP_SIZE == 8
		while (__wt_timestamp_cmp(
		    &txn->commit_timestamp, &prev_commit_timestamp) > 0) {
			if (__wt_atomic_cas64(
			    &txn_global->commit_timestamp.val,
			    prev_commit_timestamp.val,
			    txn->commit_timestamp.val)) {
				txn_global->has_commit_timestamp = true;
				break;
			}
		    __wt_timestamp_set(
			&prev_commit_timestamp, &txn_global->commit_timestamp);
		}
#else
		__wt_writelock(session, &txn_global->rwlock);
		if (__wt_timestamp_cmp(&txn->commit_timestamp,
		    &txn_global->commit_timestamp) > 0) {
			__wt_timestamp_set(&txn_global->commit_timestamp,
			    &txn->commit_timestamp);
			txn_global->has_commit_timestamp = true;
		}
		__wt_writeunlock(session, &txn_global->rwlock);
#endif
	}
#endif

	/*
	 * We're between transactions, if we need to block for eviction, it's
	 * a good time to do so.  Note that we must ignore any error return
	 * because the user's data is committed.
	 */
	if (!readonly)
		(void)__wt_cache_eviction_check(session, false, false, NULL);
	return (0);

err:	/*
	 * If anything went wrong, roll back.
	 *
	 * !!!
	 * Nothing can fail after this point.
	 */
	if (locked)
		__wt_readunlock(session, &txn_global->visibility_rwlock);
	WT_TRET(__wt_txn_rollback(session, cfg));
	return (ret);
}
Ejemplo n.º 18
0
/*
 * __backup_start --
 *	Start a backup.
 */
static int
__backup_start(
    WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[])
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FSTREAM *srcfs;
	const char *dest;
	bool exist, log_only, target_list;

	conn = S2C(session);
	srcfs = NULL;
	dest = NULL;

	cb->next = 0;
	cb->list = NULL;
	cb->list_next = 0;

	WT_RET(__wt_inmem_unsupported_op(session, "backup cursor"));

	/*
	 * Single thread hot backups: we're holding the schema lock, so we
	 * know we'll serialize with other attempts to start a hot backup.
	 */
	if (conn->hot_backup)
		WT_RET_MSG(
		    session, EINVAL, "there is already a backup cursor open");

	/*
	 * The hot backup copy is done outside of WiredTiger, which means file
	 * blocks can't be freed and re-allocated until the backup completes.
	 * The checkpoint code checks the backup flag, and if a backup cursor
	 * is open checkpoints aren't discarded. We release the lock as soon
	 * as we've set the flag, we don't want to block checkpoints, we just
	 * want to make sure no checkpoints are deleted.  The checkpoint code
	 * holds the lock until it's finished the checkpoint, otherwise we
	 * could start a hot backup that would race with an already-started
	 * checkpoint.
	 *
	 * We are holding the checkpoint and schema locks so schema operations
	 * will not see the backup file list until it is complete and valid.
	 */
	__wt_writelock(session, &conn->hot_backup_lock);
	conn->hot_backup = true;
	conn->hot_backup_list = NULL;
	__wt_writeunlock(session, &conn->hot_backup_lock);

	/* We're the lock holder, we own cleanup. */
	F_SET(cb, WT_CURBACKUP_LOCKER);

	/*
	 * Create a temporary backup file.  This must be opened before
	 * generating the list of targets in backup_uri.  This file will
	 * later be renamed to the correct name depending on whether or not
	 * we're doing an incremental backup.  We need a temp file so that if
	 * we fail or crash while filling it, the existence of a partial file
	 * doesn't confuse restarting in the source database.
	 */
	WT_ERR(__wt_fopen(session, WT_BACKUP_TMP,
	    WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
	/*
	 * If a list of targets was specified, work our way through them.
	 * Else, generate a list of all database objects.
	 *
	 * Include log files if doing a full backup, and copy them before
	 * copying data files to avoid rolling the metadata forward across
	 * a checkpoint that completes during the backup.
	 */
	target_list = false;
	WT_ERR(__backup_uri(session, cfg, &target_list, &log_only));

	if (!target_list) {
		WT_ERR(__backup_log_append(session, cb, true));
		WT_ERR(__backup_all(session));
	}

	/* Add the hot backup and standard WiredTiger files to the list. */
	if (log_only) {
		/*
		 * We also open an incremental backup source file so that we
		 * can detect a crash with an incremental backup existing in
		 * the source directory versus an improper destination.
		 */
		dest = WT_INCREMENTAL_BACKUP;
		WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC,
		    WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
		WT_ERR(__backup_list_append(
		    session, cb, WT_INCREMENTAL_BACKUP));
	} else {
		dest = WT_METADATA_BACKUP;
		WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP));
		WT_ERR(__wt_fs_exist(session, WT_BASECONFIG, &exist));
		if (exist)
			WT_ERR(__backup_list_append(
			    session, cb, WT_BASECONFIG));
		WT_ERR(__wt_fs_exist(session, WT_USERCONFIG, &exist));
		if (exist)
			WT_ERR(__backup_list_append(
			    session, cb, WT_USERCONFIG));
		WT_ERR(__backup_list_append(session, cb, WT_WIREDTIGER));
	}

err:	/* Close the hot backup file. */
	WT_TRET(__wt_fclose(session, &cb->bfs));
	if (srcfs != NULL)
		WT_TRET(__wt_fclose(session, &srcfs));
	if (ret == 0) {
		WT_ASSERT(session, dest != NULL);
		WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false));
		__wt_writelock(session, &conn->hot_backup_lock);
		conn->hot_backup_list = cb->list;
		__wt_writeunlock(session, &conn->hot_backup_lock);
	}

	return (ret);
}
Ejemplo n.º 19
0
/*
 * __log_server --
 *	The log server thread.
 */
static WT_THREAD_RET
__log_server(void *arg)
{
	struct timespec start, now;
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	uint64_t timediff;
	bool did_work, locked, signalled;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	locked = signalled = false;

	/*
	 * Set this to the number of milliseconds we want to run archive and
	 * pre-allocation.  Start it so that we run on the first time through.
	 */
	timediff = WT_THOUSAND;

	/*
	 * The log server thread does a variety of work.  It forces out any
	 * buffered log writes.  It pre-allocates log files and it performs
	 * log archiving.  The reason the wrlsn thread does not force out
	 * the buffered writes is because we want to process and move the
	 * write_lsn forward as quickly as possible.  The same reason applies
	 * to why the log file server thread does not force out the writes.
	 * That thread does fsync calls which can take a long time and we
	 * don't want log records sitting in the buffer over the time it
	 * takes to sync out an earlier file.
	 */
	did_work = true;
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Slots depend on future activity.  Force out buffered
		 * writes in case we are idle.  This cannot be part of the
		 * wrlsn thread because of interaction advancing the write_lsn
		 * and a buffer may need to wait for the write_lsn to advance
		 * in the case of a synchronous buffer.  We end up with a hang.
		 */
		WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work));

		/*
		 * We don't want to archive or pre-allocate files as often as
		 * we want to force out log buffers.  Only do it once per second
		 * or if the condition was signalled.
		 */
		if (timediff >= WT_THOUSAND || signalled) {

			/*
			 * Perform log pre-allocation.
			 */
			if (conn->log_prealloc > 0) {
				/*
				 * Log file pre-allocation is disabled when a
				 * hot backup cursor is open because we have
				 * agreed not to rename or remove any files in
				 * the database directory.
				 */
				WT_ERR(__wt_readlock(
				    session, conn->hot_backup_lock));
				locked = true;
				if (!conn->hot_backup)
					WT_ERR(__log_prealloc_once(session));
				WT_ERR(__wt_readunlock(
				    session, conn->hot_backup_lock));
				locked = false;
			}

			/*
			 * Perform the archive.
			 */
			if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
				if (__wt_try_writelock(
				    session, log->log_archive_lock) == 0) {
					ret = __log_archive_once(session, 0);
					WT_TRET(__wt_writeunlock(
					    session, log->log_archive_lock));
					WT_ERR(ret);
				} else
					WT_ERR(
					    __wt_verbose(session, WT_VERB_LOG,
					    "log_archive: Blocked due to open "
					    "log cursor holding archive lock"));
			}
		}

		/* Wait until the next event. */

		WT_ERR(__wt_epoch(session, &start));
		WT_ERR(__wt_cond_auto_wait_signal(session, conn->log_cond,
		    did_work, &signalled));
		WT_ERR(__wt_epoch(session, &now));
		timediff = WT_TIMEDIFF_MS(now, start);
	}

	if (0) {
err:		__wt_err(session, ret, "log server error");
		if (locked)
			WT_TRET(__wt_readunlock(
			    session, conn->hot_backup_lock));
	}
	return (WT_THREAD_RET_VALUE);
}
Ejemplo n.º 20
0
/*
 * __backup_start --
 *	Start a backup.
 */
static int
__backup_start(
    WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[])
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	bool exist, log_only, target_list;

	conn = S2C(session);

	cb->next = 0;
	cb->list = NULL;
	cb->list_next = 0;

	/*
	 * Single thread hot backups: we're holding the schema lock, so we
	 * know we'll serialize with other attempts to start a hot backup.
	 */
	if (conn->hot_backup)
		WT_RET_MSG(
		    session, EINVAL, "there is already a backup cursor open");

	/*
	 * The hot backup copy is done outside of WiredTiger, which means file
	 * blocks can't be freed and re-allocated until the backup completes.
	 * The checkpoint code checks the backup flag, and if a backup cursor
	 * is open checkpoints aren't discarded. We release the lock as soon
	 * as we've set the flag, we don't want to block checkpoints, we just
	 * want to make sure no checkpoints are deleted.  The checkpoint code
	 * holds the lock until it's finished the checkpoint, otherwise we
	 * could start a hot backup that would race with an already-started
	 * checkpoint.
	 */
	WT_RET(__wt_writelock(session, conn->hot_backup_lock));
	conn->hot_backup = true;
	WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock));

	/* Create the hot backup file. */
	WT_ERR(__backup_file_create(session, cb, false));

	/* Add log files if logging is enabled. */

	/*
	 * If a list of targets was specified, work our way through them.
	 * Else, generate a list of all database objects.
	 *
	 * Include log files if doing a full backup, and copy them before
	 * copying data files to avoid rolling the metadata forward across
	 * a checkpoint that completes during the backup.
	 */
	target_list = false;
	WT_ERR(__backup_uri(session, cfg, &target_list, &log_only));

	if (!target_list) {
		WT_ERR(__backup_log_append(session, cb, true));
		WT_ERR(__backup_all(session));
	}

	/* Add the hot backup and standard WiredTiger files to the list. */
	if (log_only) {
		/*
		 * Close any hot backup file.
		 * We're about to open the incremental backup file.
		 */
		WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE));
		WT_ERR(__backup_file_create(session, cb, log_only));
		WT_ERR(__backup_list_append(
		    session, cb, WT_INCREMENTAL_BACKUP));
	} else {
		WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP));
		WT_ERR(__wt_exist(session, WT_BASECONFIG, &exist));
		if (exist)
			WT_ERR(__backup_list_append(
			    session, cb, WT_BASECONFIG));
		WT_ERR(__wt_exist(session, WT_USERCONFIG, &exist));
		if (exist)
			WT_ERR(__backup_list_append(
			    session, cb, WT_USERCONFIG));
		WT_ERR(__backup_list_append(session, cb, WT_WIREDTIGER));
	}

err:	/* Close the hot backup file. */
	WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE));
	if (ret != 0) {
		WT_TRET(__backup_cleanup_handles(session, cb));
		WT_TRET(__backup_stop(session));
	}

	return (ret);
}
Ejemplo n.º 21
0
/*
 * __conn_btree_open --
 *	Open the current btree handle.
 */
static int
__conn_btree_open(
	WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
{
	WT_BTREE *btree;
	WT_DATA_HANDLE *dhandle;
	WT_DECL_RET;

	dhandle = session->dhandle;
	btree = S2BT(session);

	WT_ASSERT(session, F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) &&
	    F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
	    !LF_ISSET(WT_DHANDLE_LOCK_ONLY));

	/*
	 * If the handle is already open, it has to be closed so it can be
	 * reopened with a new configuration.  We don't need to check again:
	 * this function isn't called if the handle is already open in the
	 * required mode.
	 *
	 * This call can return EBUSY if there's an update in the object that's
	 * not yet globally visible.  That's not a problem because it can only
	 * happen when we're switching from a normal handle to a "special" one,
	 * so we're returning EBUSY to an attempt to verify or do other special
	 * operations.  The reverse won't happen because when the handle from a
	 * verify or other special operation is closed, there won't be updates
	 * in the tree that can block the close.
	 */
	if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
		WT_RET(__wt_conn_btree_sync_and_close(session, 0));

	/* Discard any previous configuration, set up the new configuration. */
	__conn_btree_config_clear(session);
	WT_RET(__conn_btree_config_set(session));

	/* Set any special flags on the handle. */
	F_SET(btree, LF_ISSET(WT_BTREE_SPECIAL_FLAGS));

	do {
		WT_ERR(__wt_btree_open(session, cfg));
		F_SET(dhandle, WT_DHANDLE_OPEN);
		/*
		 * Checkpoint handles are read only, so eviction calculations
		 * based on the number of btrees are better to ignore them.
		 */
		if (dhandle->checkpoint == NULL)
			++S2C(session)->open_btree_count;

		/* Drop back to a readlock if that is all that was needed. */
		if (!LF_ISSET(WT_DHANDLE_EXCLUSIVE)) {
			F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
			WT_ERR(__wt_writeunlock(session, dhandle->rwlock));
			WT_ERR(
			    __conn_dhandle_open_lock(session, dhandle, flags));
		}
	} while (!F_ISSET(dhandle, WT_DHANDLE_OPEN));

	if (0) {
err:		F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
		/* If the open failed, close the handle. */
		if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
			WT_TRET(__wt_conn_btree_sync_and_close(session, 0));
	}

	return (ret);
}
Ejemplo n.º 22
0
/*
 * __conn_dhandle_open_lock --
 *	Spin on the current data handle until either (a) it is open, read
 *	locked; or (b) it is closed, write locked.  If exclusive access is
 *	requested and cannot be granted immediately because the handle is
 *	in use, fail with EBUSY.
 *
 *	Here is a brief summary of how different operations synchronize using
 *	either the schema lock, handle locks or handle flags:
 *
 *	open -- holds the schema lock, one thread gets the handle exclusive,
 *		reverts to a shared handle lock and drops the schema lock
 *		once the handle is open;
 *	bulk load -- sets bulk and exclusive;
 *	salvage, truncate, update, verify -- hold the schema lock, set a
 *		"special" flag;
 *	sweep -- gets a write lock on the handle, doesn't set exclusive
 *
 *	The schema lock prevents a lot of potential conflicts: we should never
 *	see handles being salvaged or verified because those operation hold the
 *	schema lock.  However, it is possible to see a handle that is being
 *	bulk loaded, or that the sweep server is closing.
 *
 *	The principle here is that application operations can cause other
 *	application operations to fail (so attempting to open a cursor on a
 *	file while it is being bulk-loaded will fail), but internal or
 *	database-wide operations should not prevent application-initiated
 *	operations.  For example, attempting to verify a file should not fail
 *	because the sweep server happens to be in the process of closing that
 *	file.
 */
static int
__conn_dhandle_open_lock(
    WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, uint32_t flags)
{
	WT_BTREE *btree;
	WT_DECL_RET;
	int is_open, lock_busy, want_exclusive;

	btree = dhandle->handle;
	lock_busy = 0;
	want_exclusive = LF_ISSET(WT_DHANDLE_EXCLUSIVE) ? 1 : 0;

	/*
	 * Check that the handle is open.  We've already incremented
	 * the reference count, so once the handle is open it won't be
	 * closed by another thread.
	 *
	 * If we can see the WT_DHANDLE_OPEN flag set while holding a
	 * lock on the handle, then it's really open and we can start
	 * using it.  Alternatively, if we can get an exclusive lock
	 * and WT_DHANDLE_OPEN is still not set, we need to do the open.
	 */
	for (;;) {
		/*
		 * If the handle is already open for a special operation,
		 * give up.
		 */
		if (F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS))
			return (EBUSY);

		/*
		 * If the handle is open, get a read lock and recheck.
		 *
		 * Wait for a read lock if we want exclusive access and failed
		 * to get it: the sweep server may be closing this handle, and
		 * we need to wait for it to complete.  If we want exclusive
		 * access and find the handle open once we get the read lock,
		 * give up: some other thread has it locked for real.
		 */
		if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
		    (!want_exclusive || lock_busy)) {
			WT_RET(__wt_readlock(session, dhandle->rwlock));
			is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN) ? 1 : 0;
			if (is_open && !want_exclusive)
				return (0);
			WT_RET(__wt_readunlock(session, dhandle->rwlock));
		} else
			is_open = 0;

		/*
		 * It isn't open or we want it exclusive: try to get an
		 * exclusive lock.  There is some subtlety here: if we race
		 * with another thread that successfully opens the file, we
		 * don't want to block waiting to get exclusive access.
		 */
		if ((ret = __wt_try_writelock(session, dhandle->rwlock)) == 0) {
			/*
			 * If it was opened while we waited, drop the write
			 * lock and get a read lock instead.
			 */
			if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
			    !want_exclusive) {
				lock_busy = 0;
				WT_RET(
				    __wt_writeunlock(session, dhandle->rwlock));
				continue;
			}

			/* We have an exclusive lock, we're done. */
			F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
			return (0);
		} else if (ret != EBUSY || (is_open && want_exclusive))
			return (ret);
		else
			lock_busy = 1;

		/* Give other threads a chance to make progress. */
		__wt_yield();
	}
}
Ejemplo n.º 23
0
/*
 * __log_server --
 *	The log server thread.
 */
static WT_THREAD_RET
__log_server(void *arg)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	WT_SESSION_IMPL *session;
	int freq_per_sec, signalled;

	session = arg;
	conn = S2C(session);
	log = conn->log;
	signalled = 0;

	/*
	 * Set this to the number of times per second we want to force out the
	 * log slot buffer.
	 */
#define	WT_FORCE_PER_SECOND	20
	freq_per_sec = WT_FORCE_PER_SECOND;

	/*
	 * The log server thread does a variety of work.  It forces out any
	 * buffered log writes.  It pre-allocates log files and it performs
	 * log archiving.  The reason the wrlsn thread does not force out
	 * the buffered writes is because we want to process and move the
	 * write_lsn forward as quickly as possible.  The same reason applies
	 * to why the log file server thread does not force out the writes.
	 * That thread does fsync calls which can take a long time and we
	 * don't want log records sitting in the buffer over the time it
	 * takes to sync out an earlier file.
	 */
	while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) {
		/*
		 * Slots depend on future activity.  Force out buffered
		 * writes in case we are idle.  This cannot be part of the
		 * wrlsn thread because of interaction advancing the write_lsn
		 * and a buffer may need to wait for the write_lsn to advance
		 * in the case of a synchronous buffer.  We end up with a hang.
		 */
		WT_ERR_BUSY_OK(__wt_log_force_write(session, 0));

		/*
		 * We don't want to archive or pre-allocate files as often as
		 * we want to force out log buffers.  Only do it once per second
		 * or if the condition was signalled.
		 */
		if (--freq_per_sec <= 0 || signalled != 0) {
			freq_per_sec = WT_FORCE_PER_SECOND;

			/*
			 * Perform log pre-allocation.
			 */
			if (conn->log_prealloc > 0)
				WT_ERR(__log_prealloc_once(session));

			/*
			 * Perform the archive.
			 */
			if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) {
				if (__wt_try_writelock(
				    session, log->log_archive_lock) == 0) {
					ret = __log_archive_once(session, 0);
					WT_TRET(__wt_writeunlock(
					    session, log->log_archive_lock));
					WT_ERR(ret);
				} else
					WT_ERR(
					    __wt_verbose(session, WT_VERB_LOG,
					    "log_archive: Blocked due to open "
					    "log cursor holding archive lock"));
			}
		}

		/* Wait until the next event. */
		WT_ERR(__wt_cond_wait_signal(session, conn->log_cond,
		    WT_MILLION / WT_FORCE_PER_SECOND, &signalled));
	}

	if (0) {
err:		__wt_err(session, ret, "log server error");
	}
	return (WT_THREAD_RET_VALUE);
}
Ejemplo n.º 24
0
/*
 * __compact_rewrite --
 *	Return if a page needs to be re-written.
 */
static int
__compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
	WT_BM *bm;
	WT_DECL_RET;
	WT_MULTI *multi;
	WT_PAGE *page;
	WT_PAGE_MODIFY *mod;
	size_t addr_size;
	uint32_t i;
	const uint8_t *addr;

	*skipp = true;					/* Default skip. */

	bm = S2BT(session)->bm;
	page = ref->page;
	mod = page->modify;

	/*
	 * Ignore the root: it may not have a replacement address, and besides,
	 * if anything else gets written, so will it.
	 */
	if (__wt_ref_is_root(ref))
		return (0);

	/* Ignore currently dirty pages, they will be written regardless. */
	if (__wt_page_is_modified(page))
		return (0);

	/*
	 * If the page is clean, test the original addresses.
	 * If the page is a replacement, test the replacement addresses.
	 * Ignore empty pages, they get merged into the parent.
	 */
	if (mod == NULL || mod->rec_result == 0) {
		__wt_ref_info(ref, &addr, &addr_size, NULL);
		if (addr == NULL)
			return (0);
		return (
		    bm->compact_page_skip(bm, session, addr, addr_size, skipp));
	}

	/*
	 * The page's modification information can change underfoot if the page
	 * is being reconciled, serialize with reconciliation.
	 */
	if (mod->rec_result == WT_PM_REC_REPLACE ||
	    mod->rec_result == WT_PM_REC_MULTIBLOCK)
		__wt_writelock(session, &page->page_lock);

	if (mod->rec_result == WT_PM_REC_REPLACE)
		ret = bm->compact_page_skip(bm, session,
		    mod->mod_replace.addr, mod->mod_replace.size, skipp);

	if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
		for (multi = mod->mod_multi,
		    i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
			if (multi->disk_image != NULL)
				continue;
			if ((ret = bm->compact_page_skip(bm, session,
			    multi->addr.addr, multi->addr.size, skipp)) != 0)
				break;
			if (!*skipp)
				break;
		}

	if (mod->rec_result == WT_PM_REC_REPLACE ||
	    mod->rec_result == WT_PM_REC_MULTIBLOCK)
		__wt_writeunlock(session, &page->page_lock);

	return (ret);
}