/* * __wt_log_get_active_files -- * Retrieve the list of active log files (those that are not candidates * for archiving). */ int __wt_log_get_active_files( WT_SESSION_IMPL *session, char ***filesp, u_int *countp) { WT_DECL_RET; WT_LOG *log; char **files; uint32_t id; u_int count, i; id = 0; log = S2C(session)->log; WT_RET(__wt_log_get_files(session, &files, &count)); /* Filter out any files that are below the checkpoint LSN. */ for (i = 0; i < count; ) { WT_ERR(__wt_log_extract_lognum(session, files[i], &id)); if (id < log->ckpt_lsn.file) { __wt_free(session, files[i]); files[i] = files[count - 1]; files[--count] = NULL; } else i++; } *filesp = files; *countp = count; if (0) { err: __wt_log_files_free(session, files, count); } return (ret); }
/* * __log_truncate -- * Truncate the log to the given LSN. If this_log is set, it will only * truncate the log file indicated in the given LSN. If not set, * it will truncate between the given LSN and the trunc_lsn. That is, * since we pre-allocate log files, it will free that space and allow the * log to be traversed. We use the trunc_lsn because logging has already * opened the new/next log file before recovery ran. This function assumes * we are in recovery or other dedicated time and not during live running. */ static int __log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, uint32_t this_log) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *log_fh, *tmp_fh; WT_LOG *log; uint32_t lognum; u_int i, logcount; char **logfiles; conn = S2C(session); log = conn->log; log_fh = NULL; logcount = 0; logfiles = NULL; /* * Truncate the log file to the given LSN. */ WT_ERR(__log_openfile(session, 0, &log_fh, lsn->file)); WT_ERR(__wt_ftruncate(session, log_fh, lsn->offset)); tmp_fh = log_fh; log_fh = NULL; WT_ERR(__wt_close(session, tmp_fh)); /* * If we just want to truncate the current log, return and skip * looking for intervening logs. */ if (this_log) goto err; WT_ERR(__wt_log_get_files(session, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); if (lognum > lsn->file && lognum < log->trunc_lsn.file) { WT_ERR(__log_openfile(session, 0, &log_fh, lognum)); /* * If there are intervening files pre-allocated, * truncate them to the end of the log file header. */ WT_ERR(__wt_ftruncate(session, log_fh, LOG_FIRST_RECORD)); tmp_fh = log_fh; log_fh = NULL; WT_ERR(__wt_close(session, tmp_fh)); } } err: if (log_fh != NULL) WT_TRET(__wt_close(session, log_fh)); if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); return (ret); }
/* * __wt_log_open -- * Open the appropriate log file for the connection. The purpose is * to find the last log file that exists, open it and set our initial * LSNs to the end of that file. If none exist, call __wt_log_newfile * to create it. */ int __wt_log_open(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; uint32_t firstlog, lastlog, lognum; u_int i, logcount; char **logfiles; conn = S2C(session); log = conn->log; lastlog = 0; firstlog = UINT32_MAX; WT_RET(__wt_log_get_files(session, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); lastlog = WT_MAX(lastlog, lognum); firstlog = WT_MIN(firstlog, lognum); } log->fileid = lastlog; WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_open: first log %d last log %d", firstlog, lastlog)); log->first_lsn.file = firstlog; log->first_lsn.offset = 0; /* * Start logging at the beginning of the next log file, no matter * where the previous log file ends. */ WT_ERR(__wt_log_newfile(session, 1)); /* * If there were log files, run recovery. * XXX belongs at a higher level than this. */ if (logcount > 0) { log->trunc_lsn = log->alloc_lsn; WT_ERR(__wt_txn_recover(session)); } err: __wt_log_files_free(session, logfiles, logcount); return (ret); }
/* * __backup_log_append -- * Append log files needed for backup. */ static int __backup_log_append(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, int active) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; u_int i, logcount; char **logfiles; conn = S2C(session); logfiles = NULL; logcount = 0; ret = 0; if (conn->log) { WT_ERR(__wt_log_get_all_files( session, &logfiles, &logcount, &cb->maxid, active)); for (i = 0; i < logcount; i++) WT_ERR(__backup_list_append(session, cb, logfiles[i])); } err: if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); return (ret); }
/* * __log_archive_once -- * Perform one iteration of log archiving. Must be called with the * log archive lock held. */ static int __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; uint32_t lognum, min_lognum; u_int i, locked, logcount; char **logfiles; conn = S2C(session); log = conn->log; logcount = 0; logfiles = NULL; /* * If we're coming from a backup cursor we want the smaller of * the last full log file copied in backup or the checkpoint LSN. * Otherwise we want the minimum of the last log file written to * disk and the checkpoint LSN. */ if (backup_file != 0) min_lognum = WT_MIN(log->ckpt_lsn.file, backup_file); else min_lognum = WT_MIN(log->ckpt_lsn.file, log->sync_lsn.file); WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_archive: archive to log number %" PRIu32, min_lognum)); /* * Main archive code. Get the list of all log files and * remove any earlier than the minimum log number. */ WT_RET(__wt_dirlist(session, conn->log_path, WT_LOG_FILENAME, WT_DIRLIST_INCLUDE, &logfiles, &logcount)); /* * We can only archive files if a hot backup is not in progress or * if we are the backup. */ WT_RET(__wt_readlock(session, conn->hot_backup_lock)); locked = 1; if (conn->hot_backup == 0 || backup_file != 0) { for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum( session, logfiles[i], &lognum)); if (lognum < min_lognum) WT_ERR(__wt_log_remove( session, WT_LOG_FILENAME, lognum)); } } WT_ERR(__wt_readunlock(session, conn->hot_backup_lock)); locked = 0; __wt_log_files_free(session, logfiles, logcount); logfiles = NULL; logcount = 0; /* * Indicate what is our new earliest LSN. It is the start * of the log file containing the last checkpoint. */ log->first_lsn.file = min_lognum; log->first_lsn.offset = 0; if (0) err: __wt_err(session, ret, "log archive server error"); if (locked) WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); return (ret); }
/* * __wt_log_scan -- * Scan the logs, calling a function on each record found. */ int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, void *cookie), void *cookie) { WT_CONNECTION_IMPL *conn; WT_ITEM buf; WT_DECL_RET; WT_FH *log_fh; WT_LOG *log; WT_LOG_RECORD *logrec; WT_LSN end_lsn, rd_lsn, start_lsn; off_t log_size; uint32_t allocsize, cksum, firstlog, lastlog, lognum, rdup_len, reclen; u_int i, logcount; int eol; char **logfiles; conn = S2C(session); log = conn->log; log_fh = NULL; logcount = 0; logfiles = NULL; eol = 0; WT_CLEAR(buf); /* * If the caller did not give us a callback function there is nothing * to do. */ if (func == NULL) return (0); if (LF_ISSET(WT_LOGSCAN_RECOVER)) WT_RET(__wt_verbose(session, WT_VERB_LOG, "__wt_log_scan truncating to %u/%" PRIuMAX, log->trunc_lsn.file, (uintmax_t)log->trunc_lsn.offset)); if (log != NULL) { allocsize = log->allocsize; if (lsnp == NULL) { if (LF_ISSET(WT_LOGSCAN_FIRST)) start_lsn = log->first_lsn; else if (LF_ISSET(WT_LOGSCAN_FROM_CKP)) start_lsn = log->ckpt_lsn; else return (WT_ERROR); /* Illegal usage */ } else { if (LF_ISSET(WT_LOGSCAN_FIRST|WT_LOGSCAN_FROM_CKP)) WT_RET_MSG(session, WT_ERROR, "choose either a start LSN or a start flag"); /* Offsets must be on allocation boundaries. */ if (lsnp->offset % allocsize != 0 || lsnp->file > log->fileid) return (WT_NOTFOUND); /* * Log cursors may not know the starting LSN. If an * LSN pointer is passed in, but it is the INIT_LSN, * start from the first_lsn. */ start_lsn = *lsnp; if (IS_INIT_LSN(&start_lsn)) start_lsn = log->first_lsn; } end_lsn = log->alloc_lsn; } else { /* * If logging is not configured, we can still print out the log * if log files exist. We just need to set the LSNs from what * is in the files versus what is in the live connection. */ /* * Set allocsize to the minimum alignment it could be. Larger * records and larger allocation boundaries should always be * a multiple of this. */ allocsize = LOG_ALIGN; lastlog = 0; firstlog = UINT32_MAX; WT_RET(__wt_log_get_files(session, &logfiles, &logcount)); if (logcount == 0) /* * Return it is not supported if none don't exist. */ return (ENOTSUP); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); lastlog = WT_MAX(lastlog, lognum); firstlog = WT_MIN(firstlog, lognum); } start_lsn.file = firstlog; end_lsn.file = lastlog; start_lsn.offset = end_lsn.offset = 0; __wt_log_files_free(session, logfiles, logcount); logfiles = NULL; } WT_ERR(__log_openfile(session, 0, &log_fh, start_lsn.file)); WT_ERR(__log_filesize(session, log_fh, &log_size)); rd_lsn = start_lsn; WT_ERR(__wt_buf_initsize(session, &buf, LOG_ALIGN)); for (;;) { if (rd_lsn.offset + allocsize > log_size) { advance: /* * If we read the last record, go to the next file. */ WT_ERR(__wt_close(session, log_fh)); log_fh = NULL; eol = 1; /* * Truncate this log file before we move to the next. */ if (LF_ISSET(WT_LOGSCAN_RECOVER)) WT_ERR(__log_truncate(session, &rd_lsn, 1)); rd_lsn.file++; rd_lsn.offset = 0; /* * Avoid an error message when we reach end of log * by checking here. */ if (rd_lsn.file > end_lsn.file) break; WT_ERR(__log_openfile( session, 0, &log_fh, rd_lsn.file)); WT_ERR(__log_filesize(session, log_fh, &log_size)); continue; } /* * Read the minimum allocation size a record could be. */ WT_ASSERT(session, buf.memsize >= allocsize); WT_ERR(__wt_read(session, log_fh, rd_lsn.offset, (size_t)allocsize, buf.mem)); /* * First 8 bytes is the real record length. See if we * need to read more than the allocation size. We expect * that we rarely will have to read more. Most log records * will be fairly small. */ reclen = *(uint32_t *)buf.mem; /* * Log files are pre-allocated. We never expect a zero length * unless we've reached the end of the log. The log can be * written out of order, so when recovery finds the end of * the log, truncate the file and remove any later log files * that may exist. */ if (reclen == 0) { /* This LSN is the end. */ break; } rdup_len = __wt_rduppo2(reclen, allocsize); if (reclen > allocsize) { /* * The log file end could be the middle of this * log record. */ if (rd_lsn.offset + rdup_len > log_size) goto advance; /* * We need to round up and read in the full padded * record, especially for direct I/O. */ WT_ERR(__wt_buf_grow(session, &buf, rdup_len)); WT_ERR(__wt_read(session, log_fh, rd_lsn.offset, (size_t)rdup_len, buf.mem)); WT_STAT_FAST_CONN_INCR(session, log_scan_rereads); } /* * We read in the record, verify checksum. */ buf.size = reclen; logrec = (WT_LOG_RECORD *)buf.mem; cksum = logrec->checksum; logrec->checksum = 0; logrec->checksum = __wt_cksum(logrec, logrec->len); if (logrec->checksum != cksum) { /* * A checksum mismatch means we have reached the end of * the useful part of the log. This should be found on * the first pass through recovery. In the second pass * where we truncate the log, this is where it should * end. */ if (log != NULL) log->trunc_lsn = rd_lsn; break; } /* * We have a valid log record. If it is not the log file * header, invoke the callback. */ WT_STAT_FAST_CONN_INCR(session, log_scan_records); if (rd_lsn.offset != 0) { WT_ERR((*func)(session, &buf, &rd_lsn, cookie)); if (LF_ISSET(WT_LOGSCAN_ONE)) break; } rd_lsn.offset += (off_t)rdup_len; } /* Truncate if we're in recovery. */ if (LF_ISSET(WT_LOGSCAN_RECOVER) && LOG_CMP(&rd_lsn, &log->trunc_lsn) < 0) WT_ERR(__log_truncate(session, &rd_lsn, 0)); err: WT_STAT_FAST_CONN_INCR(session, log_scans); if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); __wt_buf_free(session, &buf); /* * If the caller wants one record and it is at the end of log, * return WT_NOTFOUND. */ if (LF_ISSET(WT_LOGSCAN_ONE) && eol && ret == 0) ret = WT_NOTFOUND; if (ret == ENOENT) ret = 0; if (log_fh != NULL) WT_TRET(__wt_close(session, log_fh)); return (ret); }
/* * __backup_start -- * Start a backup. */ static int __backup_start( WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[]) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; u_int i, logcount; int target_list; char **logfiles; conn = S2C(session); cb->next = 0; cb->list = NULL; logfiles = NULL; logcount = 0; /* * Single thread hot backups: we're holding the schema lock, so we * know we'll serialize with other attempts to start a hot backup. */ if (conn->hot_backup) WT_RET_MSG( session, EINVAL, "there is already a backup cursor open"); /* * The hot backup copy is done outside of WiredTiger, which means file * blocks can't be freed and re-allocated until the backup completes. * The checkpoint code checks the backup flag, and if a backup cursor * is open checkpoints aren't discarded. We release the lock as soon * as we've set the flag, we don't want to block checkpoints, we just * want to make sure no checkpoints are deleted. The checkpoint code * holds the lock until it's finished the checkpoint, otherwise we * could start a hot backup that would race with an already-started * checkpoint. */ __wt_spin_lock(session, &conn->hot_backup_lock); conn->hot_backup = 1; __wt_spin_unlock(session, &conn->hot_backup_lock); /* Create the hot backup file. */ WT_ERR(__backup_file_create(session, cb)); /* Add log files if logging is enabled. */ /* * If a list of targets was specified, work our way through them. * Else, generate a list of all database objects. * * Include log files if doing a full backup, and copy them before * copying data files to avoid rolling the metadata forward across * a checkpoint that completes during the backup. */ target_list = 0; WT_ERR(__backup_uri(session, cb, cfg, &target_list)); if (!target_list) { if (conn->log) { WT_ERR(__wt_log_get_active_files( session, &logfiles, &logcount)); for (i = 0; i < logcount; i++) WT_ERR(__backup_list_append( session, cb, logfiles[i])); } WT_ERR(__backup_all(session, cb)); } /* Add the hot backup and single-threading file to the list. */ WT_ERR(__backup_list_append(session, cb, WT_METADATA_BACKUP)); WT_ERR(__backup_list_append(session, cb, WT_SINGLETHREAD)); err: /* Close the hot backup file. */ if (cb->bfp != NULL) { WT_TRET(fclose(cb->bfp) == 0 ? 0 : __wt_errno()); cb->bfp = NULL; } if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); if (ret != 0) { WT_TRET(__backup_cleanup_handles(session, cb)); WT_TRET(__backup_stop(session)); } return (ret); }