/* * __curlog_search -- * WT_CURSOR.search method for the log cursor type. */ static int __curlog_search(WT_CURSOR *cursor) { WT_CURSOR_LOG *cl; WT_DECL_RET; WT_LSN key; WT_SESSION_IMPL *session; uint32_t counter, key_file, key_offset; cl = (WT_CURSOR_LOG *)cursor; CURSOR_API_CALL(cursor, session, search, NULL); /* * !!! We are ignoring the counter and only searching based on the LSN. */ WT_ERR(__wt_cursor_get_key((WT_CURSOR *)cl, &key_file, &key_offset, &counter)); WT_SET_LSN(&key, key_file, key_offset); ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE, __curlog_logrec, cl); if (ret == ENOENT) ret = WT_NOTFOUND; WT_ERR(ret); WT_ERR(__curlog_kv(session, cursor)); WT_STAT_FAST_CONN_INCR(session, cursor_search); WT_STAT_FAST_DATA_INCR(session, cursor_search); err: API_END_RET(session, ret); }
/* * __curlog_search -- * WT_CURSOR.search method for the log cursor type. */ static int __curlog_search(WT_CURSOR *cursor) { WT_CURSOR_LOG *cl; WT_DECL_RET; WT_LSN key; WT_SESSION_IMPL *session; uint32_t counter, key_file, key_offset, raw; cl = (WT_CURSOR_LOG *)cursor; /* Temporarily turn off raw so we can do direct cursor operations. */ raw = F_MASK(cursor, WT_CURSTD_RAW); F_CLR(cursor, WT_CURSTD_RAW); CURSOR_API_CALL(cursor, session, search, NULL); /* * !!! We are ignoring the counter and only searching based on the LSN. */ WT_ERR(__wt_cursor_get_key(cursor, &key_file, &key_offset, &counter)); WT_SET_LSN(&key, key_file, key_offset); ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE, __curlog_logrec, cl); if (ret == ENOENT) ret = WT_NOTFOUND; WT_ERR(ret); WT_ERR(__curlog_kv(session, cursor)); WT_STAT_CONN_INCR(session, cursor_search); WT_STAT_DATA_INCR(session, cursor_search); err: F_SET(cursor, raw); API_END_RET(session, ret); }
/* * __recovery_setup_file -- * Set up the recovery slot for a file. */ static int __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) { WT_CONFIG_ITEM cval; WT_LSN lsn; uint32_t fileid, lsnfile, lsnoffset; WT_RET(__wt_config_getones(r->session, config, "id", &cval)); fileid = (uint32_t)cval.val; /* Track the largest file ID we have seen. */ if (fileid > r->max_fileid) r->max_fileid = fileid; if (r->nfiles <= fileid) { WT_RET(__wt_realloc_def( r->session, &r->file_alloc, fileid + 1, &r->files)); r->nfiles = fileid + 1; } WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri)); WT_RET( __wt_config_getones(r->session, config, "checkpoint_lsn", &cval)); /* If there is checkpoint logged for the file, apply everything. */ if (cval.type != WT_CONFIG_ITEM_STRUCT) WT_INIT_LSN(&lsn); /* NOLINTNEXTLINE(cert-err34-c) */ else if (sscanf(cval.str, "(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2) WT_SET_LSN(&lsn, lsnfile, lsnoffset); else WT_RET_MSG(r->session, EINVAL, "Failed to parse checkpoint LSN '%.*s'", (int)cval.len, cval.str); r->files[fileid].ckpt_lsn = lsn; __wt_verbose(r->session, WT_VERB_RECOVERY, "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")", uri, fileid, lsn.l.file, lsn.l.offset); if ((!WT_IS_MAX_LSN(&lsn) && !WT_IS_INIT_LSN(&lsn)) && (WT_IS_MAX_LSN(&r->max_ckpt_lsn) || __wt_log_cmp(&lsn, &r->max_ckpt_lsn) > 0)) r->max_ckpt_lsn = lsn; return (0); }
/* * __wt_txn_checkpoint_logread -- * Read a log record for a checkpoint operation. */ int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn) { WT_DECL_RET; WT_ITEM ckpt_snapshot_unused; uint32_t ckpt_file, ckpt_offset; u_int ckpt_nsnapshot_unused; const char *fmt = WT_UNCHECKED_STRING(IIIu); if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &ckpt_file, &ckpt_offset, &ckpt_nsnapshot_unused, &ckpt_snapshot_unused)) != 0) WT_RET_MSG(session, ret, "txn_checkpoint_logread: unpack failure"); WT_SET_LSN(ckpt_lsn, ckpt_file, ckpt_offset); *pp = end; return (0); }
/* * __log_file_server -- * The log file server thread. This worker thread manages * log file operations such as closing and syncing. */ static WT_THREAD_RET __log_file_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; WT_LSN close_end_lsn, min_lsn; WT_SESSION_IMPL *session; uint32_t filenum; bool locked; session = arg; conn = S2C(session); log = conn->log; locked = false; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. */ if ((close_fh = log->log_close_fh) != NULL) { WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum)); /* * We update the close file handle before updating the * close LSN when changing files. It is possible we * could see mismatched settings. If we do, yield * until it is set. This should rarely happen. */ while (log->log_close_lsn.l.file < filenum) __wt_yield(); if (__wt_log_cmp( &log->write_lsn, &log->log_close_lsn) >= 0) { /* * We've copied the file handle, clear out the * one in the log structure to allow it to be * set again. Copy the LSN before clearing * the file handle. * Use a barrier to make sure the compiler does * not reorder the following two statements. */ close_end_lsn = log->log_close_lsn; WT_FULL_BARRIER(); log->log_close_fh = NULL; /* * Set the close_end_lsn to the LSN immediately * after ours. That is, the beginning of the * next log file. We need to know the LSN * file number of our own close in case earlier * calls are still in progress and the next one * to move the sync_lsn into the next file for * later syncs. */ WT_ERR(__wt_fsync(session, close_fh)); /* * We want to make sure the file size reflects * actual data and has minimal pre-allocated * zeroed space. */ WT_ERR(__wt_ftruncate(session, close_fh, close_end_lsn.l.offset)); WT_SET_LSN(&close_end_lsn, close_end_lsn.l.file + 1, 0); __wt_spin_lock(session, &log->log_sync_lock); locked = true; WT_ERR(__wt_close(session, &close_fh)); WT_ASSERT(session, __wt_log_cmp( &close_end_lsn, &log->sync_lsn) >= 0); log->sync_lsn = close_end_lsn; WT_ERR(__wt_cond_signal( session, log->log_sync_cond)); locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } } /* * If a later thread asked for a background sync, do it now. */ if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) { /* * Save the latest write LSN which is the minimum * we will have written to disk. */ min_lsn = log->write_lsn; /* * We have to wait until the LSN we asked for is * written. If it isn't signal the wrlsn thread * to get it written. * * We also have to wait for the written LSN and the * sync LSN to be in the same file so that we know we * have synchronized all earlier log files. */ if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) { /* * If the sync file is behind either the one * wanted for a background sync or the write LSN * has moved to another file continue to let * this worker thread process that older file * immediately. */ if ((log->sync_lsn.l.file < log->bg_sync_lsn.l.file) || (log->sync_lsn.l.file < min_lsn.l.file)) continue; WT_ERR(__wt_fsync(session, log->log_fh)); __wt_spin_lock(session, &log->log_sync_lock); locked = true; /* * The sync LSN could have advanced while we * were writing to disk. */ if (__wt_log_cmp( &log->sync_lsn, &min_lsn) <= 0) { WT_ASSERT(session, min_lsn.l.file == log->sync_lsn.l.file); log->sync_lsn = min_lsn; WT_ERR(__wt_cond_signal( session, log->log_sync_cond)); } locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } else { WT_ERR(__wt_cond_auto_signal( session, conn->log_wrlsn_cond)); /* * We do not want to wait potentially a second * to process this. Yield to give the wrlsn * thread a chance to run and try again in * this case. */ __wt_yield(); continue; } } /* Wait until the next event. */ WT_ERR(__wt_cond_wait( session, conn->log_file_cond, WT_MILLION / 10)); } if (0) { err: __wt_err(session, ret, "log close server error"); } if (locked) __wt_spin_unlock(session, &log->log_sync_lock); return (WT_THREAD_RET_VALUE); }
/* * __log_archive_once -- * Perform one iteration of log archiving. Must be called with the * log archive lock held. */ static int __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; uint32_t lognum, min_lognum; u_int i, logcount; bool locked; char **logfiles; conn = S2C(session); log = conn->log; logcount = 0; logfiles = NULL; /* * If we're coming from a backup cursor we want the smaller of * the last full log file copied in backup or the checkpoint LSN. * Otherwise we want the minimum of the last log file written to * disk and the checkpoint LSN. */ if (backup_file != 0) min_lognum = WT_MIN(log->ckpt_lsn.l.file, backup_file); else min_lognum = WT_MIN( log->ckpt_lsn.l.file, log->sync_lsn.l.file); WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_archive: archive to log number %" PRIu32, min_lognum)); /* * Main archive code. Get the list of all log files and * remove any earlier than the minimum log number. */ WT_RET(__wt_dirlist(session, conn->log_path, WT_LOG_FILENAME, WT_DIRLIST_INCLUDE, &logfiles, &logcount)); /* * We can only archive files if a hot backup is not in progress or * if we are the backup. */ WT_RET(__wt_readlock(session, conn->hot_backup_lock)); locked = true; if (!conn->hot_backup || backup_file != 0) { for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum( session, logfiles[i], &lognum)); if (lognum < min_lognum) WT_ERR(__wt_log_remove( session, WT_LOG_FILENAME, lognum)); } } WT_ERR(__wt_readunlock(session, conn->hot_backup_lock)); locked = false; __wt_log_files_free(session, logfiles, logcount); logfiles = NULL; logcount = 0; /* * Indicate what is our new earliest LSN. It is the start * of the log file containing the last checkpoint. */ WT_SET_LSN(&log->first_lsn, min_lognum, 0); if (0) err: __wt_err(session, ret, "log archive server error"); if (locked) WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); return (ret); }
/* * __log_file_server -- * The log file server thread. This worker thread manages * log file operations such as closing and syncing. */ static WT_THREAD_RET __log_file_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; WT_LSN close_end_lsn, min_lsn; WT_SESSION_IMPL *session; uint64_t yield_count; uint32_t filenum; bool locked; session = arg; conn = S2C(session); log = conn->log; locked = false; yield_count = 0; while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. */ if ((close_fh = log->log_close_fh) != NULL) { WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum)); /* * The closing file handle should have a correct close * LSN. */ WT_ASSERT(session, log->log_close_lsn.l.file == filenum); if (__wt_log_cmp( &log->write_lsn, &log->log_close_lsn) >= 0) { /* * We've copied the file handle, clear out the * one in the log structure to allow it to be * set again. Copy the LSN before clearing * the file handle. * Use a barrier to make sure the compiler does * not reorder the following two statements. */ close_end_lsn = log->log_close_lsn; WT_FULL_BARRIER(); log->log_close_fh = NULL; /* * Set the close_end_lsn to the LSN immediately * after ours. That is, the beginning of the * next log file. We need to know the LSN * file number of our own close in case earlier * calls are still in progress and the next one * to move the sync_lsn into the next file for * later syncs. */ WT_ERR(__wt_fsync(session, close_fh, true)); /* * We want to have the file size reflect actual * data with minimal pre-allocated zeroed space. * We can't truncate the file during hot backup, * or the underlying file system may not support * truncate: both are OK, it's just more work * during cursor traversal. */ if (!conn->hot_backup) { __wt_readlock( session, &conn->hot_backup_lock); if (!conn->hot_backup) WT_ERR_ERROR_OK( __wt_ftruncate(session, close_fh, close_end_lsn.l.offset), ENOTSUP); __wt_readunlock( session, &conn->hot_backup_lock); } WT_SET_LSN(&close_end_lsn, close_end_lsn.l.file + 1, 0); __wt_spin_lock(session, &log->log_sync_lock); locked = true; WT_ERR(__wt_close(session, &close_fh)); WT_ASSERT(session, __wt_log_cmp( &close_end_lsn, &log->sync_lsn) >= 0); log->sync_lsn = close_end_lsn; __wt_cond_signal(session, log->log_sync_cond); locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } } /* * If a later thread asked for a background sync, do it now. */ if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) { /* * Save the latest write LSN which is the minimum * we will have written to disk. */ min_lsn = log->write_lsn; /* * We have to wait until the LSN we asked for is * written. If it isn't signal the wrlsn thread * to get it written. * * We also have to wait for the written LSN and the * sync LSN to be in the same file so that we know we * have synchronized all earlier log files. */ if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) { /* * If the sync file is behind either the one * wanted for a background sync or the write LSN * has moved to another file continue to let * this worker thread process that older file * immediately. */ if ((log->sync_lsn.l.file < log->bg_sync_lsn.l.file) || (log->sync_lsn.l.file < min_lsn.l.file)) continue; WT_ERR(__wt_fsync(session, log->log_fh, true)); __wt_spin_lock(session, &log->log_sync_lock); locked = true; /* * The sync LSN could have advanced while we * were writing to disk. */ if (__wt_log_cmp( &log->sync_lsn, &min_lsn) <= 0) { WT_ASSERT(session, min_lsn.l.file == log->sync_lsn.l.file); log->sync_lsn = min_lsn; __wt_cond_signal( session, log->log_sync_cond); } locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } else { __wt_cond_signal(session, conn->log_wrlsn_cond); /* * We do not want to wait potentially a second * to process this. Yield to give the wrlsn * thread a chance to run and try again in * this case. */ yield_count++; __wt_yield(); continue; } } /* Wait until the next event. */ __wt_cond_wait(session, conn->log_file_cond, 100000, NULL); } if (0) { err: WT_PANIC_MSG(session, ret, "log close server error"); } WT_STAT_CONN_INCRV(session, log_server_sync_blocked, yield_count); if (locked) __wt_spin_unlock(session, &log->log_sync_lock); return (WT_THREAD_RET_VALUE); }