/* * __wt_txn_recover -- * Run recovery. */ int __wt_txn_recover(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_CURSOR *metac; WT_DECL_RET; WT_RECOVERY r; struct WT_RECOVERY_FILE *metafile; char *config; bool eviction_started, needs_rec, was_backup; conn = S2C(session); WT_CLEAR(r); WT_INIT_LSN(&r.ckpt_lsn); eviction_started = false; was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP); /* We need a real session for recovery. */ WT_RET(__wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, &session)); r.session = session; F_SET(conn, WT_CONN_RECOVERING); WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config)); WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config)); WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac)); metafile = &r.files[WT_METAFILE_ID]; metafile->c = metac; /* * If no log was found (including if logging is disabled), or if the * last checkpoint was done with logging disabled, recovery should not * run. Scan the metadata to figure out the largest file ID. */ if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_EXISTED) || WT_IS_MAX_LSN(&metafile->ckpt_lsn)) { WT_ERR(__recovery_file_scan(&r)); conn->next_file_id = r.max_fileid; goto done; } /* * First, do a pass through the log to recover the metadata, and * establish the last checkpoint LSN. Skip this when opening a hot * backup: we already have the correct metadata in that case. */ if (!was_backup) { r.metadata_only = true; /* * If this is a read-only connection, check if the checkpoint * LSN in the metadata file is up to date, indicating a clean * shutdown. */ if (F_ISSET(conn, WT_CONN_READONLY)) { WT_ERR(__wt_log_needs_recovery( session, &metafile->ckpt_lsn, &needs_rec)); if (needs_rec) WT_ERR_MSG(session, WT_RUN_RECOVERY, "Read-only database needs recovery"); } if (WT_IS_INIT_LSN(&metafile->ckpt_lsn)) WT_ERR(__wt_log_scan(session, NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r)); else { /* * Start at the last checkpoint LSN referenced in the * metadata. If we see the end of a checkpoint while * scanning, we will change the full scan to start from * there. */ r.ckpt_lsn = metafile->ckpt_lsn; ret = __wt_log_scan(session, &metafile->ckpt_lsn, 0, __txn_log_recover, &r); if (ret == ENOENT) ret = 0; WT_ERR(ret); } } /* Scan the metadata to find the live files and their IDs. */ WT_ERR(__recovery_file_scan(&r)); /* * We no longer need the metadata cursor: close it to avoid pinning any * resources that could block eviction during recovery. */ r.files[0].c = NULL; WT_ERR(metac->close(metac)); /* * Now, recover all the files apart from the metadata. * Pass WT_LOGSCAN_RECOVER so that old logs get truncated. */ r.metadata_only = false; WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY, "Main recovery loop: starting at %" PRIu32 "/%" PRIu32, r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset)); WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec)); /* * Check if the database was shut down cleanly. If not * return an error if the user does not want automatic * recovery. */ if (needs_rec && (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) || F_ISSET(conn, WT_CONN_READONLY))) { if (F_ISSET(conn, WT_CONN_READONLY)) WT_ERR_MSG(session, WT_RUN_RECOVERY, "Read-only database needs recovery"); WT_ERR(WT_RUN_RECOVERY); } if (F_ISSET(conn, WT_CONN_READONLY)) goto done; /* * Recovery can touch more data than fits in cache, so it relies on * regular eviction to manage paging. Start eviction threads for * recovery without LAS cursors. */ WT_ERR(__wt_evict_create(session)); eviction_started = true; /* * Always run recovery even if it was a clean shutdown only if * this is not a read-only connection. * We can consider skipping it in the future. */ if (WT_IS_INIT_LSN(&r.ckpt_lsn)) WT_ERR(__wt_log_scan(session, NULL, WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER, __txn_log_recover, &r)); else { ret = __wt_log_scan(session, &r.ckpt_lsn, WT_LOGSCAN_RECOVER, __txn_log_recover, &r); if (ret == ENOENT) ret = 0; WT_ERR(ret); } conn->next_file_id = r.max_fileid; /* * If recovery ran successfully forcibly log a checkpoint so the next * open is fast and keep the metadata up to date with the checkpoint * LSN and archiving. */ WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); done: FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE); err: WT_TRET(__recovery_free(&r)); __wt_free(session, config); if (ret != 0) __wt_err(session, ret, "Recovery failed"); /* * Destroy the eviction threads that were started in support of * recovery. They will be restarted once the lookaside table is * created. */ if (eviction_started) WT_TRET(__wt_evict_destroy(session)); WT_TRET(session->iface.close(&session->iface, NULL)); F_CLR(conn, WT_CONN_RECOVERING); return (ret); }
/* * __wt_connection_close -- * Close a connection handle. */ int __wt_connection_close(WT_CONNECTION_IMPL *conn) { WT_CONNECTION *wt_conn; WT_DECL_RET; WT_DLH *dlh; WT_SESSION_IMPL *s, *session; u_int i; wt_conn = &conn->iface; session = conn->default_session; /* Shut down transactions (wait for in-flight operations to complete. */ WT_TRET(__wt_txn_global_shutdown(session)); /* Shut down the subsystems, ensuring workers see the state change. */ F_SET(conn, WT_CONN_CLOSING); WT_FULL_BARRIER(); /* * Clear any pending async operations and shut down the async worker * threads and system before closing LSM. */ WT_TRET(__wt_async_flush(session)); WT_TRET(__wt_async_destroy(session)); /* * Shut down server threads other than the eviction server, which is * needed later to close btree handles. Some of these threads access * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ WT_TRET(__wt_lsm_manager_destroy(session)); /* * Once the async and LSM threads exit, we shouldn't be opening any * more files. */ F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); WT_FULL_BARRIER(); WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_sweep_destroy(session)); /* The eviction server is shut down last. */ WT_TRET(__wt_evict_destroy(session)); /* Shut down the lookaside table, after all eviction is complete. */ WT_TRET(__wt_las_destroy(session)); /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); /* Shut down metadata tracking. */ WT_TRET(__wt_meta_track_destroy(session)); /* * Now that all data handles are closed, tell logging that a checkpoint * has completed then shut down the log manager (only after closing * data handles). The call to destroy the log manager is outside the * conditional because we allocate the log path so that printlog can * run without running logging or recovery. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE)) WT_TRET(__wt_txn_checkpoint_log( session, true, WT_TXN_LOG_CKPT_STOP, NULL)); WT_TRET(__wt_logmgr_destroy(session)); /* Free memory for collators, compressors, data sources. */ WT_TRET(__wt_conn_remove_collator(session)); WT_TRET(__wt_conn_remove_compressor(session)); WT_TRET(__wt_conn_remove_data_source(session)); WT_TRET(__wt_conn_remove_encryptor(session)); WT_TRET(__wt_conn_remove_extractor(session)); /* Disconnect from shared cache - must be before cache destroy. */ WT_TRET(__wt_conn_cache_pool_destroy(session)); /* Discard the cache. */ WT_TRET(__wt_cache_destroy(session)); /* Discard transaction state. */ __wt_txn_global_destroy(session); /* Close the lock file, opening up the database to other connections. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, &conn->lock_fh)); /* Close any file handles left open. */ WT_TRET(__wt_close_connection_close(session)); /* * Close the internal (default) session, and switch back to the dummy * session in case of any error messages from the remaining operations * while destroying the connection handle. */ if (session != &conn->dummy_session) { WT_TRET(session->iface.close(&session->iface, NULL)); session = conn->default_session = &conn->dummy_session; } /* * The session split stash, hazard information and handle arrays aren't * discarded during normal session close, they persist past the life of * the session. Discard them now. */ if (!F_ISSET(conn, WT_CONN_LEAK_MEMORY)) if ((s = conn->sessions) != NULL) for (i = 0; i < conn->session_size; ++s, ++i) { __wt_free(session, s->dhhash); __wt_stash_discard_all(session, s); __wt_free(session, s->hazard); } /* Destroy the file-system configuration. */ if (conn->file_system != NULL && conn->file_system->terminate != NULL) WT_TRET(conn->file_system->terminate( conn->file_system, (WT_SESSION *)session)); /* Close extensions, first calling any unload entry point. */ while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { TAILQ_REMOVE(&conn->dlhqh, dlh, q); if (dlh->terminate != NULL) WT_TRET(dlh->terminate(wt_conn)); WT_TRET(__wt_dlclose(session, dlh)); } /* Destroy the handle. */ __wt_connection_destroy(conn); return (ret); }
/* * __wt_connection_close -- * Close a connection handle. */ int __wt_connection_close(WT_CONNECTION_IMPL *conn) { WT_CONNECTION *wt_conn; WT_DECL_RET; WT_DLH *dlh; WT_SESSION_IMPL *s, *session; WT_TXN_GLOBAL *txn_global; u_int i; wt_conn = &conn->iface; txn_global = &conn->txn_global; session = conn->default_session; /* * We're shutting down. Make sure everything gets freed. * * It's possible that the eviction server is in the middle of a long * operation, with a transaction ID pinned. In that case, we will loop * here until the transaction ID is released, when the oldest * transaction ID will catch up with the current ID. */ for (;;) { WT_TRET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); if (txn_global->oldest_id == txn_global->current) break; __wt_yield(); } /* Clear any pending async ops. */ WT_TRET(__wt_async_flush(session)); /* * Shut down server threads other than the eviction server, which is * needed later to close btree handles. Some of these threads access * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ F_CLR(conn, WT_CONN_SERVER_RUN); WT_TRET(__wt_async_destroy(session)); WT_TRET(__wt_lsm_manager_destroy(session)); WT_TRET(__wt_sweep_destroy(session)); F_SET(conn, WT_CONN_CLOSING); WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_evict_destroy(session)); /* Shut down the lookaside table, after all eviction is complete. */ WT_TRET(__wt_las_destroy(session)); /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); /* Shut down metadata tracking, required before creating tables. */ WT_TRET(__wt_meta_track_destroy(session)); /* * Now that all data handles are closed, tell logging that a checkpoint * has completed then shut down the log manager (only after closing * data handles). The call to destroy the log manager is outside the * conditional because we allocate the log path so that printlog can * run without running logging or recovery. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE)) WT_TRET(__wt_txn_checkpoint_log( session, true, WT_TXN_LOG_CKPT_STOP, NULL)); F_CLR(conn, WT_CONN_LOG_SERVER_RUN); WT_TRET(__wt_logmgr_destroy(session)); /* Free memory for collators, compressors, data sources. */ WT_TRET(__wt_conn_remove_collator(session)); WT_TRET(__wt_conn_remove_compressor(session)); WT_TRET(__wt_conn_remove_data_source(session)); WT_TRET(__wt_conn_remove_encryptor(session)); WT_TRET(__wt_conn_remove_extractor(session)); /* Disconnect from shared cache - must be before cache destroy. */ WT_TRET(__wt_conn_cache_pool_destroy(session)); /* Discard the cache. */ WT_TRET(__wt_cache_destroy(session)); /* Discard transaction state. */ WT_TRET(__wt_txn_global_destroy(session)); /* Close extensions, first calling any unload entry point. */ while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { TAILQ_REMOVE(&conn->dlhqh, dlh, q); if (dlh->terminate != NULL) WT_TRET(dlh->terminate(wt_conn)); WT_TRET(__wt_dlclose(session, dlh)); } /* Close the lock file, opening up the database to other connections. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, &conn->lock_fh)); /* Close any file handles left open. */ WT_TRET(__wt_close_connection_close(session)); /* * Close the internal (default) session, and switch back to the dummy * session in case of any error messages from the remaining operations * while destroying the connection handle. */ if (session != &conn->dummy_session) { WT_TRET(session->iface.close(&session->iface, NULL)); session = conn->default_session = &conn->dummy_session; } /* * The session's split stash isn't discarded during normal session close * because it may persist past the life of the session. Discard it now. */ if ((s = conn->sessions) != NULL) for (i = 0; i < conn->session_size; ++s, ++i) __wt_split_stash_discard_all(session, s); /* * The session's hazard pointer memory isn't discarded during normal * session close because access to it isn't serialized. Discard it * now. */ if ((s = conn->sessions) != NULL) for (i = 0; i < conn->session_size; ++s, ++i) { /* * If hash arrays were allocated, free them now. */ __wt_free(session, s->dhhash); __wt_free(session, s->tablehash); __wt_free(session, s->hazard); } /* Destroy the handle. */ WT_TRET(__wt_connection_destroy(conn)); return (ret); }
/* * __wt_txn_recover -- * Run recovery. */ int __wt_txn_recover(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_CURSOR *metac; WT_DECL_RET; WT_RECOVERY r; WT_RECOVERY_FILE *metafile; char *config; bool do_checkpoint, eviction_started, needs_rec, was_backup; conn = S2C(session); WT_CLEAR(r); WT_INIT_LSN(&r.ckpt_lsn); config = NULL; do_checkpoint = true; eviction_started = false; was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP); /* We need a real session for recovery. */ WT_RET(__wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, &session)); r.session = session; WT_MAX_LSN(&r.max_ckpt_lsn); WT_MAX_LSN(&r.max_rec_lsn); conn->txn_global.recovery_timestamp = conn->txn_global.meta_ckpt_timestamp = 0; F_SET(conn, WT_CONN_RECOVERING); WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config)); WT_ERR(__recovery_setup_file(&r, WT_METAFILE_URI, config)); WT_ERR(__wt_metadata_cursor_open(session, NULL, &metac)); metafile = &r.files[WT_METAFILE_ID]; metafile->c = metac; /* * If no log was found (including if logging is disabled), or if the * last checkpoint was done with logging disabled, recovery should not * run. Scan the metadata to figure out the largest file ID. */ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) || WT_IS_MAX_LSN(&metafile->ckpt_lsn)) { /* * Detect if we're going from logging disabled to enabled. * We need to know this to verify LSNs and start at the correct * log file later. If someone ran with logging, then disabled * it and removed all the log files and then turned logging back * on, we have to start logging in the log file number that is * larger than any checkpoint LSN we have from the earlier time. */ WT_ERR(__recovery_file_scan(&r)); /* * The array can be re-allocated in recovery_file_scan. Reset * our pointer after scanning all the files. */ metafile = &r.files[WT_METAFILE_ID]; conn->next_file_id = r.max_fileid; if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && WT_IS_MAX_LSN(&metafile->ckpt_lsn) && !WT_IS_MAX_LSN(&r.max_ckpt_lsn)) WT_ERR(__wt_log_reset(session, r.max_ckpt_lsn.l.file)); else do_checkpoint = false; goto done; } /* * First, do a pass through the log to recover the metadata, and * establish the last checkpoint LSN. Skip this when opening a hot * backup: we already have the correct metadata in that case. * * If we're running with salvage and we hit an error, we ignore it * and continue. In salvage we want to recover whatever part of the * data we can from the last checkpoint up until whatever problem we * detect in the log file. In salvage, we ignore errors from scanning * the log so recovery can continue. Other errors remain errors. */ if (!was_backup) { r.metadata_only = true; /* * If this is a read-only connection, check if the checkpoint * LSN in the metadata file is up to date, indicating a clean * shutdown. */ if (F_ISSET(conn, WT_CONN_READONLY)) { WT_ERR(__wt_log_needs_recovery( session, &metafile->ckpt_lsn, &needs_rec)); if (needs_rec) WT_ERR_MSG(session, WT_RUN_RECOVERY, "Read-only database needs recovery"); } if (WT_IS_INIT_LSN(&metafile->ckpt_lsn)) ret = __wt_log_scan(session, NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r); else { /* * Start at the last checkpoint LSN referenced in the * metadata. If we see the end of a checkpoint while * scanning, we will change the full scan to start from * there. */ r.ckpt_lsn = metafile->ckpt_lsn; ret = __wt_log_scan(session, &metafile->ckpt_lsn, 0, __txn_log_recover, &r); } if (F_ISSET(conn, WT_CONN_SALVAGE)) ret = 0; /* * If log scan couldn't find a file we expected to be around, * this indicates a corruption of some sort. */ if (ret == ENOENT) { F_SET(conn, WT_CONN_DATA_CORRUPTION); ret = WT_ERROR; } WT_ERR(ret); } /* Scan the metadata to find the live files and their IDs. */ WT_ERR(__recovery_file_scan(&r)); /* * Clear this out. We no longer need it and it could have been * re-allocated when scanning the files. */ WT_NOT_READ(metafile, NULL); /* * We no longer need the metadata cursor: close it to avoid pinning any * resources that could block eviction during recovery. */ r.files[0].c = NULL; WT_ERR(metac->close(metac)); /* * Now, recover all the files apart from the metadata. * Pass WT_LOGSCAN_RECOVER so that old logs get truncated. */ r.metadata_only = false; __wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS, "Main recovery loop: starting at %" PRIu32 "/%" PRIu32 " to %" PRIu32 "/%" PRIu32, r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset, r.max_rec_lsn.l.file, r.max_rec_lsn.l.offset); WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec)); /* * Check if the database was shut down cleanly. If not * return an error if the user does not want automatic * recovery. */ if (needs_rec && (FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR) || F_ISSET(conn, WT_CONN_READONLY))) { if (F_ISSET(conn, WT_CONN_READONLY)) WT_ERR_MSG(session, WT_RUN_RECOVERY, "Read-only database needs recovery"); WT_ERR_MSG(session, WT_RUN_RECOVERY, "Database needs recovery"); } if (F_ISSET(conn, WT_CONN_READONLY)) { do_checkpoint = false; goto done; } /* * Recovery can touch more data than fits in cache, so it relies on * regular eviction to manage paging. Start eviction threads for * recovery without LAS cursors. */ WT_ERR(__wt_evict_create(session)); eviction_started = true; /* * Always run recovery even if it was a clean shutdown only if * this is not a read-only connection. * We can consider skipping it in the future. */ if (needs_rec) FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY); if (WT_IS_INIT_LSN(&r.ckpt_lsn)) ret = __wt_log_scan(session, NULL, WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER, __txn_log_recover, &r); else ret = __wt_log_scan(session, &r.ckpt_lsn, WT_LOGSCAN_RECOVER, __txn_log_recover, &r); if (F_ISSET(conn, WT_CONN_SALVAGE)) ret = 0; WT_ERR(ret); conn->next_file_id = r.max_fileid; done: WT_ERR(__recovery_set_checkpoint_timestamp(&r)); if (do_checkpoint) /* * Forcibly log a checkpoint so the next open is fast and keep * the metadata up to date with the checkpoint LSN and * archiving. */ WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); /* * If we're downgrading and have newer log files, force an archive, * no matter what the archive setting is. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) WT_ERR(__wt_log_truncate_files(session, NULL, true)); FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE); err: WT_TRET(__recovery_free(&r)); __wt_free(session, config); FLD_CLR(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY); if (ret != 0) { FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_FAILED); __wt_err(session, ret, "Recovery failed"); } /* * Destroy the eviction threads that were started in support of * recovery. They will be restarted once the lookaside table is * created. */ if (eviction_started) WT_TRET(__wt_evict_destroy(session)); WT_TRET(session->iface.close(&session->iface, NULL)); F_CLR(conn, WT_CONN_RECOVERING); return (ret); }