Beispiel #1
0
/*
 * __wt_log_get_active_files --
 *	Retrieve the list of active log files (those that are not candidates
 *	for archiving).
 */
int
__wt_log_get_active_files(
	WT_SESSION_IMPL *session, char ***filesp, u_int *countp)
{
	WT_DECL_RET;
	WT_LOG *log;
	char **files;
	uint32_t id;
	u_int count, i;

	id = 0;
	log = S2C(session)->log;

	WT_RET(__wt_log_get_files(session, &files, &count));

	/* Filter out any files that are below the checkpoint LSN. */
	for (i = 0; i < count; ) {
		WT_ERR(__wt_log_extract_lognum(session, files[i], &id));
		if (id < log->ckpt_lsn.file) {
			__wt_free(session, files[i]);
			files[i] = files[count - 1];
			files[--count] = NULL;
		} else
			i++;
	}

	*filesp = files;
	*countp = count;

	if (0) {
err:		__wt_log_files_free(session, files, count);
	}
	return (ret);
}
Beispiel #2
0
/*
 * __log_truncate --
 *	Truncate the log to the given LSN.  If this_log is set, it will only
 *	truncate the log file indicated in the given LSN.  If not set,
 *	it will truncate between the given LSN and the trunc_lsn.  That is,
 *	since we pre-allocate log files, it will free that space and allow the
 *	log to be traversed.  We use the trunc_lsn because logging has already
 *	opened the new/next log file before recovery ran.  This function assumes
 *	we are in recovery or other dedicated time and not during live running.
 */
static int
__log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, uint32_t this_log)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_FH *log_fh, *tmp_fh;
	WT_LOG *log;
	uint32_t lognum;
	u_int i, logcount;
	char **logfiles;

	conn = S2C(session);
	log = conn->log;
	log_fh = NULL;
	logcount = 0;
	logfiles = NULL;

	/*
	 * Truncate the log file to the given LSN.
	 */
	WT_ERR(__log_openfile(session, 0, &log_fh, lsn->file));
	WT_ERR(__wt_ftruncate(session, log_fh, lsn->offset));
	tmp_fh = log_fh;
	log_fh = NULL;
	WT_ERR(__wt_close(session, tmp_fh));

	/*
	 * If we just want to truncate the current log, return and skip
	 * looking for intervening logs.
	 */
	if (this_log)
		goto err;
	WT_ERR(__wt_log_get_files(session, &logfiles, &logcount));
	for (i = 0; i < logcount; i++) {
		WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
		if (lognum > lsn->file && lognum < log->trunc_lsn.file) {
			WT_ERR(__log_openfile(session, 0, &log_fh, lognum));
			/*
			 * If there are intervening files pre-allocated,
			 * truncate them to the end of the log file header.
			 */
			WT_ERR(__wt_ftruncate(session,
			    log_fh, LOG_FIRST_RECORD));
			tmp_fh = log_fh;
			log_fh = NULL;
			WT_ERR(__wt_close(session, tmp_fh));
		}
	}
err:	if (log_fh != NULL)
		WT_TRET(__wt_close(session, log_fh));
	if (logfiles != NULL)
		__wt_log_files_free(session, logfiles, logcount);
	return (ret);
}
Beispiel #3
0
/*
 * __wt_log_open --
 *	Open the appropriate log file for the connection.  The purpose is
 *	to find the last log file that exists, open it and set our initial
 *	LSNs to the end of that file.  If none exist, call __wt_log_newfile
 *	to create it.
 */
int
__wt_log_open(WT_SESSION_IMPL *session)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	uint32_t firstlog, lastlog, lognum;
	u_int i, logcount;
	char **logfiles;

	conn = S2C(session);
	log = conn->log;
	lastlog = 0;
	firstlog = UINT32_MAX;

	WT_RET(__wt_log_get_files(session, &logfiles, &logcount));
	for (i = 0; i < logcount; i++) {
		WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
		lastlog = WT_MAX(lastlog, lognum);
		firstlog = WT_MIN(firstlog, lognum);
	}
	log->fileid = lastlog;
	WT_ERR(__wt_verbose(session, WT_VERB_LOG,
	    "log_open: first log %d last log %d", firstlog, lastlog));
	log->first_lsn.file = firstlog;
	log->first_lsn.offset = 0;

	/*
	 * Start logging at the beginning of the next log file, no matter
	 * where the previous log file ends.
	 */
	WT_ERR(__wt_log_newfile(session, 1));

	/*
	 * If there were log files, run recovery.
	 * XXX belongs at a higher level than this.
	 */
	if (logcount > 0) {
		log->trunc_lsn = log->alloc_lsn;
		WT_ERR(__wt_txn_recover(session));
	}

err:	__wt_log_files_free(session, logfiles, logcount);
	return (ret);
}
Beispiel #4
0
/*
 * __backup_log_append --
 *	Append log files needed for backup.
 */
static int
__backup_log_append(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, int active)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	u_int i, logcount;
	char **logfiles;

	conn = S2C(session);
	logfiles = NULL;
	logcount = 0;
	ret = 0;

	if (conn->log) {
		WT_ERR(__wt_log_get_all_files(
		    session, &logfiles, &logcount, &cb->maxid, active));
		for (i = 0; i < logcount; i++)
			WT_ERR(__backup_list_append(session, cb, logfiles[i]));
	}
err:	if (logfiles != NULL)
		__wt_log_files_free(session, logfiles, logcount);
	return (ret);
}
Beispiel #5
0
/*
 * __log_archive_once --
 *	Perform one iteration of log archiving.  Must be called with the
 *	log archive lock held.
 */
static int
__log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	WT_LOG *log;
	uint32_t lognum, min_lognum;
	u_int i, locked, logcount;
	char **logfiles;

	conn = S2C(session);
	log = conn->log;
	logcount = 0;
	logfiles = NULL;

	/*
	 * If we're coming from a backup cursor we want the smaller of
	 * the last full log file copied in backup or the checkpoint LSN.
	 * Otherwise we want the minimum of the last log file written to
	 * disk and the checkpoint LSN.
	 */
	if (backup_file != 0)
		min_lognum = WT_MIN(log->ckpt_lsn.file, backup_file);
	else
		min_lognum = WT_MIN(log->ckpt_lsn.file, log->sync_lsn.file);
	WT_RET(__wt_verbose(session, WT_VERB_LOG,
	    "log_archive: archive to log number %" PRIu32, min_lognum));

	/*
	 * Main archive code.  Get the list of all log files and
	 * remove any earlier than the minimum log number.
	 */
	WT_RET(__wt_dirlist(session, conn->log_path,
	    WT_LOG_FILENAME, WT_DIRLIST_INCLUDE, &logfiles, &logcount));

	/*
	 * We can only archive files if a hot backup is not in progress or
	 * if we are the backup.
	 */
	WT_RET(__wt_readlock(session, conn->hot_backup_lock));
	locked = 1;
	if (conn->hot_backup == 0 || backup_file != 0) {
		for (i = 0; i < logcount; i++) {
			WT_ERR(__wt_log_extract_lognum(
			    session, logfiles[i], &lognum));
			if (lognum < min_lognum)
				WT_ERR(__wt_log_remove(
				    session, WT_LOG_FILENAME, lognum));
		}
	}
	WT_ERR(__wt_readunlock(session, conn->hot_backup_lock));
	locked = 0;
	__wt_log_files_free(session, logfiles, logcount);
	logfiles = NULL;
	logcount = 0;

	/*
	 * Indicate what is our new earliest LSN.  It is the start
	 * of the log file containing the last checkpoint.
	 */
	log->first_lsn.file = min_lognum;
	log->first_lsn.offset = 0;

	if (0)
err:		__wt_err(session, ret, "log archive server error");
	if (locked)
		WT_TRET(__wt_readunlock(session, conn->hot_backup_lock));
	if (logfiles != NULL)
		__wt_log_files_free(session, logfiles, logcount);
	return (ret);
}
Beispiel #6
0
/*
 * __wt_log_scan --
 *	Scan the logs, calling a function on each record found.
 */
int
__wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
    int (*func)(WT_SESSION_IMPL *session,
    WT_ITEM *record, WT_LSN *lsnp, void *cookie), void *cookie)
{
	WT_CONNECTION_IMPL *conn;
	WT_ITEM buf;
	WT_DECL_RET;
	WT_FH *log_fh;
	WT_LOG *log;
	WT_LOG_RECORD *logrec;
	WT_LSN end_lsn, rd_lsn, start_lsn;
	off_t log_size;
	uint32_t allocsize, cksum, firstlog, lastlog, lognum, rdup_len, reclen;
	u_int i, logcount;
	int eol;
	char **logfiles;

	conn = S2C(session);
	log = conn->log;
	log_fh = NULL;
	logcount = 0;
	logfiles = NULL;
	eol = 0;
	WT_CLEAR(buf);

	/*
	 * If the caller did not give us a callback function there is nothing
	 * to do.
	 */
	if (func == NULL)
		return (0);

	if (LF_ISSET(WT_LOGSCAN_RECOVER))
		WT_RET(__wt_verbose(session, WT_VERB_LOG,
		    "__wt_log_scan truncating to %u/%" PRIuMAX,
		    log->trunc_lsn.file, (uintmax_t)log->trunc_lsn.offset));

	if (log != NULL) {
		allocsize = log->allocsize;

		if (lsnp == NULL) {
			if (LF_ISSET(WT_LOGSCAN_FIRST))
				start_lsn = log->first_lsn;
			else if (LF_ISSET(WT_LOGSCAN_FROM_CKP))
				start_lsn = log->ckpt_lsn;
			else
				return (WT_ERROR);	/* Illegal usage */
		} else {
			if (LF_ISSET(WT_LOGSCAN_FIRST|WT_LOGSCAN_FROM_CKP))
				WT_RET_MSG(session, WT_ERROR,
			    "choose either a start LSN or a start flag");

			/* Offsets must be on allocation boundaries. */
			if (lsnp->offset % allocsize != 0 ||
			    lsnp->file > log->fileid)
				return (WT_NOTFOUND);

			/*
			 * Log cursors may not know the starting LSN.  If an
			 * LSN pointer is passed in, but it is the INIT_LSN,
			 * start from the first_lsn.
			 */
			start_lsn = *lsnp;
			if (IS_INIT_LSN(&start_lsn))
				start_lsn = log->first_lsn;
		}
		end_lsn = log->alloc_lsn;
	} else {
		/*
		 * If logging is not configured, we can still print out the log
		 * if log files exist.  We just need to set the LSNs from what
		 * is in the files versus what is in the live connection.
		 */
		/*
		 * Set allocsize to the minimum alignment it could be.  Larger
		 * records and larger allocation boundaries should always be
		 * a multiple of this.
		 */
		allocsize = LOG_ALIGN;
		lastlog = 0;
		firstlog = UINT32_MAX;
		WT_RET(__wt_log_get_files(session, &logfiles, &logcount));
		if (logcount == 0)
			/*
			 * Return it is not supported if none don't exist.
			 */
			return (ENOTSUP);
		for (i = 0; i < logcount; i++) {
			WT_ERR(__wt_log_extract_lognum(session, logfiles[i],
			    &lognum));
			lastlog = WT_MAX(lastlog, lognum);
			firstlog = WT_MIN(firstlog, lognum);
		}
		start_lsn.file = firstlog;
		end_lsn.file = lastlog;
		start_lsn.offset = end_lsn.offset = 0;
		__wt_log_files_free(session, logfiles, logcount);
		logfiles = NULL;
	}
	WT_ERR(__log_openfile(session, 0, &log_fh, start_lsn.file));
	WT_ERR(__log_filesize(session, log_fh, &log_size));
	rd_lsn = start_lsn;
	WT_ERR(__wt_buf_initsize(session, &buf, LOG_ALIGN));
	for (;;) {
		if (rd_lsn.offset + allocsize > log_size) {
advance:
			/*
			 * If we read the last record, go to the next file.
			 */
			WT_ERR(__wt_close(session, log_fh));
			log_fh = NULL;
			eol = 1;
			/*
			 * Truncate this log file before we move to the next.
			 */
			if (LF_ISSET(WT_LOGSCAN_RECOVER))
				WT_ERR(__log_truncate(session, &rd_lsn, 1));
			rd_lsn.file++;
			rd_lsn.offset = 0;
			/*
			 * Avoid an error message when we reach end of log
			 * by checking here.
			 */
			if (rd_lsn.file > end_lsn.file)
				break;
			WT_ERR(__log_openfile(
			    session, 0, &log_fh, rd_lsn.file));
			WT_ERR(__log_filesize(session, log_fh, &log_size));
			continue;
		}
		/*
		 * Read the minimum allocation size a record could be.
		 */
		WT_ASSERT(session, buf.memsize >= allocsize);
		WT_ERR(__wt_read(session,
		    log_fh, rd_lsn.offset, (size_t)allocsize, buf.mem));
		/*
		 * First 8 bytes is the real record length.  See if we
		 * need to read more than the allocation size.  We expect
		 * that we rarely will have to read more.  Most log records
		 * will be fairly small.
		 */
		reclen = *(uint32_t *)buf.mem;
		/*
		 * Log files are pre-allocated.  We never expect a zero length
		 * unless we've reached the end of the log.  The log can be
		 * written out of order, so when recovery finds the end of
		 * the log, truncate the file and remove any later log files
		 * that may exist.
		 */
		if (reclen == 0) {
			/* This LSN is the end. */
			break;
		}
		rdup_len = __wt_rduppo2(reclen, allocsize);
		if (reclen > allocsize) {
			/*
			 * The log file end could be the middle of this
			 * log record.
			 */
			if (rd_lsn.offset + rdup_len > log_size)
				goto advance;
			/*
			 * We need to round up and read in the full padded
			 * record, especially for direct I/O.
			 */
			WT_ERR(__wt_buf_grow(session, &buf, rdup_len));
			WT_ERR(__wt_read(session,
			    log_fh, rd_lsn.offset, (size_t)rdup_len, buf.mem));
			WT_STAT_FAST_CONN_INCR(session, log_scan_rereads);
		}
		/*
		 * We read in the record, verify checksum.
		 */
		buf.size = reclen;
		logrec = (WT_LOG_RECORD *)buf.mem;
		cksum = logrec->checksum;
		logrec->checksum = 0;
		logrec->checksum = __wt_cksum(logrec, logrec->len);
		if (logrec->checksum != cksum) {
			/*
			 * A checksum mismatch means we have reached the end of
			 * the useful part of the log.  This should be found on
			 * the first pass through recovery.  In the second pass
			 * where we truncate the log, this is where it should
			 * end.
			 */
			if (log != NULL)
				log->trunc_lsn = rd_lsn;
			break;
		}

		/*
		 * We have a valid log record.  If it is not the log file
		 * header, invoke the callback.
		 */
		WT_STAT_FAST_CONN_INCR(session, log_scan_records);
		if (rd_lsn.offset != 0) {
			WT_ERR((*func)(session, &buf, &rd_lsn, cookie));
			if (LF_ISSET(WT_LOGSCAN_ONE))
				break;
		}
		rd_lsn.offset += (off_t)rdup_len;
	}

	/* Truncate if we're in recovery. */
	if (LF_ISSET(WT_LOGSCAN_RECOVER) &&
	    LOG_CMP(&rd_lsn, &log->trunc_lsn) < 0)
		WT_ERR(__log_truncate(session, &rd_lsn, 0));

err:	WT_STAT_FAST_CONN_INCR(session, log_scans);
	if (logfiles != NULL)
		__wt_log_files_free(session, logfiles, logcount);
	__wt_buf_free(session, &buf);
	/*
	 * If the caller wants one record and it is at the end of log,
	 * return WT_NOTFOUND.
	 */
	if (LF_ISSET(WT_LOGSCAN_ONE) && eol && ret == 0)
		ret = WT_NOTFOUND;
	if (ret == ENOENT)
		ret = 0;
	if (log_fh != NULL)
		WT_TRET(__wt_close(session, log_fh));
	return (ret);
}
Beispiel #7
0
/*
 * __backup_start --
 *	Start a backup.
 */
static int
__backup_start(
    WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[])
{
	WT_CONNECTION_IMPL *conn;
	WT_DECL_RET;
	u_int i, logcount;
	int target_list;
	char **logfiles;

	conn = S2C(session);

	cb->next = 0;
	cb->list = NULL;
	logfiles = NULL;
	logcount = 0;

	/*
	 * Single thread hot backups: we're holding the schema lock, so we
	 * know we'll serialize with other attempts to start a hot backup.
	 */
	if (conn->hot_backup)
		WT_RET_MSG(
		    session, EINVAL, "there is already a backup cursor open");

	/*
	 * The hot backup copy is done outside of WiredTiger, which means file
	 * blocks can't be freed and re-allocated until the backup completes.
	 * The checkpoint code checks the backup flag, and if a backup cursor
	 * is open checkpoints aren't discarded.   We release the lock as soon
	 * as we've set the flag, we don't want to block checkpoints, we just
	 * want to make sure no checkpoints are deleted.  The checkpoint code
	 * holds the lock until it's finished the checkpoint, otherwise we
	 * could start a hot backup that would race with an already-started
	 * checkpoint.
	 */
	__wt_spin_lock(session, &conn->hot_backup_lock);
	conn->hot_backup = 1;
	__wt_spin_unlock(session, &conn->hot_backup_lock);

	/* Create the hot backup file. */
	WT_ERR(__backup_file_create(session, cb));

	/* Add log files if logging is enabled. */

	/*
	 * If a list of targets was specified, work our way through them.
	 * Else, generate a list of all database objects.
	 *
	 * Include log files if doing a full backup, and copy them before
	 * copying data files to avoid rolling the metadata forward across
	 * a checkpoint that completes during the backup.
	 */
	target_list = 0;
	WT_ERR(__backup_uri(session, cb, cfg, &target_list));
	if (!target_list) {
		if (conn->log) {
			WT_ERR(__wt_log_get_active_files(
			    session, &logfiles, &logcount));
			for (i = 0; i < logcount; i++)
				WT_ERR(__backup_list_append(
				    session, cb, logfiles[i]));
		}

		WT_ERR(__backup_all(session, cb));
	}

	/* Add the hot backup and single-threading file to the list. */
	WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP));
	WT_ERR(__backup_list_append(session, cb, WT_SINGLETHREAD));

err:	/* Close the hot backup file. */
	if (cb->bfp != NULL) {
		WT_TRET(fclose(cb->bfp) == 0 ? 0 : __wt_errno());
		cb->bfp = NULL;
	}
	if (logfiles != NULL)
		__wt_log_files_free(session, logfiles, logcount);

	if (ret != 0) {
		WT_TRET(__backup_cleanup_handles(session, cb));
		WT_TRET(__backup_stop(session));
	}

	return (ret);
}