/* * __log_truncate -- * Truncate the log to the given LSN. If this_log is set, it will only * truncate the log file indicated in the given LSN. If not set, * it will truncate between the given LSN and the trunc_lsn. That is, * since we pre-allocate log files, it will free that space and allow the * log to be traversed. We use the trunc_lsn because logging has already * opened the new/next log file before recovery ran. This function assumes * we are in recovery or other dedicated time and not during live running. */ static int __log_truncate(WT_SESSION_IMPL *session, WT_LSN *lsn, uint32_t this_log) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *log_fh, *tmp_fh; WT_LOG *log; uint32_t lognum; u_int i, logcount; char **logfiles; conn = S2C(session); log = conn->log; log_fh = NULL; logcount = 0; logfiles = NULL; /* * Truncate the log file to the given LSN. */ WT_ERR(__log_openfile(session, 0, &log_fh, lsn->file)); WT_ERR(__wt_ftruncate(session, log_fh, lsn->offset)); tmp_fh = log_fh; log_fh = NULL; WT_ERR(__wt_close(session, tmp_fh)); /* * If we just want to truncate the current log, return and skip * looking for intervening logs. */ if (this_log) goto err; WT_ERR(__wt_log_get_files(session, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); if (lognum > lsn->file && lognum < log->trunc_lsn.file) { WT_ERR(__log_openfile(session, 0, &log_fh, lognum)); /* * If there are intervening files pre-allocated, * truncate them to the end of the log file header. */ WT_ERR(__wt_ftruncate(session, log_fh, LOG_FIRST_RECORD)); tmp_fh = log_fh; log_fh = NULL; WT_ERR(__wt_close(session, tmp_fh)); } } err: if (log_fh != NULL) WT_TRET(__wt_close(session, log_fh)); if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); return (ret); }
/* * __wt_connection_destroy -- * Destroy the connection's underlying WT_CONNECTION_IMPL structure. */ int __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { WT_DECL_RET; WT_SESSION_IMPL *session; /* Check there's something to destroy. */ if (conn == NULL) return (0); session = conn->default_session; /* * Close remaining open files (before discarding the mutex, the * underlying file-close code uses the mutex to guard lists of * open files. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, conn->lock_fh)); if (conn->log_fh != NULL) WT_TRET(__wt_close(session, conn->log_fh)); /* Remove from the list of connections. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_REMOVE(&__wt_process.connqh, conn, q); __wt_spin_unlock(session, &__wt_process.spinlock); /* Configuration */ __wt_conn_config_discard(session); /* configuration */ __wt_conn_foc_discard(session); /* free-on-close */ __wt_spin_destroy(session, &conn->api_lock); __wt_spin_destroy(session, &conn->block_lock); __wt_spin_destroy(session, &conn->checkpoint_lock); __wt_spin_destroy(session, &conn->fh_lock); __wt_spin_destroy(session, &conn->hot_backup_lock); __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->serial_lock); /* Free allocated memory. */ __wt_free(session, conn->home); __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); __wt_free(NULL, conn); return (ret); }
/* * __wt_close_connection_close -- * Close any open file handles at connection close. */ int __wt_close_connection_close(WT_SESSION_IMPL *session) { WT_DECL_RET; WT_FH *fh; WT_CONNECTION_IMPL *conn; conn = S2C(session); while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { /* * In-memory configurations will have open files, but the ref * counts should be zero. */ if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) { ret = EBUSY; __wt_errx(session, "Connection has open file handles: %s", fh->name); } fh->ref = 1; F_CLR(fh, WT_FH_IN_MEMORY); WT_TRET(__wt_close(session, &fh)); } return (ret); }
/* * __wt_connection_destroy -- * Destroy the connection's underlying WT_CONNECTION_IMPL structure. */ int __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { WT_DECL_RET; WT_SESSION_IMPL *session; u_int i; /* Check there's something to destroy. */ if (conn == NULL) return (0); session = conn->default_session; /* * Close remaining open files (before discarding the mutex, the * underlying file-close code uses the mutex to guard lists of * open files. */ WT_TRET(__wt_close(session, &conn->lock_fh)); /* Remove from the list of connections. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_REMOVE(&__wt_process.connqh, conn, q); __wt_spin_unlock(session, &__wt_process.spinlock); /* Configuration */ __wt_conn_config_discard(session); /* configuration */ __wt_conn_foc_discard(session); /* free-on-close */ __wt_spin_destroy(session, &conn->api_lock); __wt_spin_destroy(session, &conn->block_lock); __wt_spin_destroy(session, &conn->checkpoint_lock); __wt_spin_destroy(session, &conn->dhandle_lock); __wt_spin_destroy(session, &conn->encryptor_lock); __wt_spin_destroy(session, &conn->fh_lock); WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock)); __wt_spin_destroy(session, &conn->las_lock); __wt_spin_destroy(session, &conn->metadata_lock); __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); __wt_spin_destroy(session, &conn->table_lock); __wt_spin_destroy(session, &conn->turtle_lock); for (i = 0; i < WT_PAGE_LOCKS; ++i) __wt_spin_destroy(session, &conn->page_lock[i]); __wt_free(session, conn->page_lock); /* Free allocated memory. */ __wt_free(session, conn->cfg); __wt_free(session, conn->home); __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); __wt_free(NULL, conn); return (ret); }
/* * __wt_log_close -- * Close the log file. */ int __wt_log_close(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_LOG *log; conn = S2C(session); log = conn->log; if (log->log_close_fh != NULL && log->log_close_fh != log->log_fh) { WT_RET(__wt_verbose(session, WT_VERB_LOG, "closing old log %s", log->log_close_fh->name)); WT_RET(__wt_close(session, log->log_close_fh)); } if (log->log_fh != NULL) { WT_RET(__wt_verbose(session, WT_VERB_LOG, "closing log %s", log->log_fh->name)); WT_RET(__wt_close(session, log->log_fh)); log->log_fh = NULL; } return (0); }
/* * __fstream_close -- * Close a stream handle. */ static int __fstream_close(WT_SESSION_IMPL *session, WT_FSTREAM *fstr) { WT_DECL_RET; if (!F_ISSET(fstr, WT_STREAM_READ)) WT_TRET(fstr->fstr_flush(session, fstr)); WT_TRET(__wt_close(session, &fstr->fh)); __wt_buf_free(session, &fstr->buf); __wt_free(session, fstr); return (ret); }
/* * __optrack_open_file -- * Open the per-session operation-tracking file. */ static int __optrack_open_file(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(buf); WT_DECL_RET; WT_OPTRACK_HEADER optrack_header = { WT_OPTRACK_VERSION, 0, (uint32_t)WT_TSC_DEFAULT_RATIO * WT_THOUSAND }; conn = S2C(session); if (!F_ISSET(conn, WT_CONN_OPTRACK)) WT_RET_MSG(session, WT_ERROR, "WT_CONN_OPTRACK not set"); WT_RET(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_filename_construct(session, conn->optrack_path, "optrack", conn->optrack_pid, session->id, buf)); WT_ERR(__wt_open(session, (const char *)buf->data, WT_FS_OPEN_FILE_TYPE_REGULAR, WT_FS_OPEN_CREATE, &session->optrack_fh)); /* Indicate whether this is an internal session */ if (F_ISSET(session, WT_SESSION_INTERNAL)) optrack_header.optrack_session_internal = 1; /* * Record the clock ticks to nanoseconds ratio. Multiply it by one * thousand, so we can use a fixed width integer. */ optrack_header.optrack_tsc_nsec_ratio = (uint32_t)(__wt_process.tsc_nsec_ratio * WT_THOUSAND); /* Write the header into the operation-tracking file. */ WT_ERR(session->optrack_fh->handle->fh_write( session->optrack_fh->handle, (WT_SESSION *)session, 0, sizeof(WT_OPTRACK_HEADER), &optrack_header)); session->optrack_offset = sizeof(WT_OPTRACK_HEADER); if (0) { err: WT_TRET(__wt_close(session, &session->optrack_fh)); } __wt_scr_free(session, &buf); return (ret); }
/* * __wt_fopen -- * Open a stream handle. */ int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) { WT_DECL_RET; WT_FH *fh; WT_FSTREAM *fstr; *fstrp = NULL; fstr = NULL; WT_RET(__wt_open( session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh)); WT_ERR(__wt_calloc_one(session, &fstr)); fstr->fh = fh; fstr->name = fh->name; fstr->flags = flags; fstr->close = __fstream_close; WT_ERR(__wt_filesize(session, fh, &fstr->size)); if (LF_ISSET(WT_STREAM_APPEND)) fstr->off = fstr->size; if (LF_ISSET(WT_STREAM_APPEND | WT_STREAM_WRITE)) { fstr->fstr_flush = __fstream_flush; fstr->fstr_getline = __fstream_getline_notsup; fstr->fstr_printf = __fstream_printf; } else { WT_ASSERT(session, LF_ISSET(WT_STREAM_READ)); fstr->fstr_flush = __fstream_flush_notsup; fstr->fstr_getline = __fstream_getline; fstr->fstr_printf = __fstream_printf_notsup; } *fstrp = fstr; return (0); err: WT_TRET(__wt_close(session, &fh)); __wt_free(session, fstr); return (ret); }
/* * __wt_block_manager_truncate -- * Truncate a file. */ int __wt_block_manager_truncate( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) { WT_DECL_RET; WT_FH *fh; /* Open the underlying file handle. */ WT_RET(__wt_open(session, filename, 0, 0, WT_FILE_TYPE_DATA, &fh)); /* Truncate the file. */ WT_ERR(__wt_ftruncate(session, fh, (off_t)0)); /* Write out the file's meta-data. */ ret = __wt_desc_init(session, fh, allocsize); /* Close the file handle. */ err: WT_TRET(__wt_close(session, fh)); return (ret); }
/* * __block_destroy -- * Destroy a block handle. */ static int __block_destroy(WT_SESSION_IMPL *session, WT_BLOCK *block) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); TAILQ_REMOVE(&conn->blockqh, block, q); if (block->name != NULL) __wt_free(session, block->name); if (block->fh != NULL) WT_TRET(__wt_close(session, block->fh)); __wt_spin_destroy(session, &block->live_lock); __wt_overwrite_and_free(session, block); return (ret); }
/* * __wt_sync_and_rename_fh -- * Sync and close a file, and swap it into place. */ int __wt_sync_and_rename_fh( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to) { WT_DECL_RET; WT_FH *fh; fh = *fhp; *fhp = NULL; /* Flush to disk and close the handle. */ ret = __wt_fsync(session, fh); WT_TRET(__wt_close(session, &fh)); WT_RET(ret); /* Rename the source file to the target. */ WT_RET(__wt_rename(session, from, to)); /* Flush the backing directory to guarantee the rename. */ return (__wt_directory_sync(session, NULL)); }
/* * __wt_close_connection_close -- * Close any open file handles at connection close. */ int __wt_close_connection_close(WT_SESSION_IMPL *session) { WT_DECL_RET; WT_FH *fh; WT_CONNECTION_IMPL *conn; conn = S2C(session); while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { if (fh->ref != 0) { ret = EBUSY; __wt_errx(session, "Connection has open file handles: %s", fh->name); } fh->ref = 1; WT_TRET(__wt_close(session, &fh)); } return (ret); }
/* * __wt_turtle_update -- * Update the turtle file. */ int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value) { WT_FH *fh; WT_DECL_ITEM(buf); WT_DECL_RET; int vmajor, vminor, vpatch; const char *version; fh = NULL; /* * Create the turtle setup file: we currently re-write it from scratch * every time. */ WT_RET(__wt_open( session, WT_METADATA_TURTLE_SET, 1, 1, WT_FILE_TYPE_TURTLE, &fh)); version = wiredtiger_version(&vmajor, &vminor, &vpatch); WT_ERR(__wt_scr_alloc(session, 2 * 1024, &buf)); WT_ERR(__wt_buf_fmt(session, buf, "%s\n%s\n%s\n" "major=%d,minor=%d,patch=%d\n%s\n%s\n", WT_METADATA_VERSION_STR, version, WT_METADATA_VERSION, vmajor, vminor, vpatch, key, value)); WT_ERR(__wt_write(session, fh, 0, buf->size, buf->data)); /* Flush the handle and rename the file into place. */ ret = __wt_sync_and_rename_fh( session, &fh, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE); /* Close any file handle left open, remove any temporary file. */ err: WT_TRET(__wt_close(session, &fh)); WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET)); __wt_scr_free(session, &buf); return (ret); }
/* * __wt_conn_optrack_teardown -- * Clean up connection-wide resources used for operation logging. */ int __wt_conn_optrack_teardown(WT_SESSION_IMPL *session, bool reconfig) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); if (!reconfig) /* Looks like we are shutting down */ __wt_free(session, conn->optrack_path); if (!F_ISSET(conn, WT_CONN_OPTRACK)) return (0); __wt_spin_destroy(session, &conn->optrack_map_spinlock); WT_TRET(__wt_close(session, &conn->optrack_map_fh)); __wt_free(session, conn->dummy_session.optrack_buf); return (ret); }
/* * __wt_block_manager_create -- * Create a file. */ int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) { WT_DECL_RET; WT_FH *fh; /* Create the underlying file and open a handle. */ WT_RET(__wt_open(session, filename, 1, 1, WT_FILE_TYPE_DATA, &fh)); /* Write out the file's meta-data. */ ret = __wt_desc_init(session, fh, allocsize); /* Close the file handle. */ WT_TRET(__wt_close(session, fh)); /* Undo any create on error. */ if (ret != 0) WT_TRET(__wt_remove(session, filename)); return (ret); }
/* * __log_file_server -- * The log file server thread. This worker thread manages * log file operations such as closing and syncing. */ static WT_THREAD_RET __log_file_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; WT_LSN close_end_lsn, close_lsn, min_lsn; WT_SESSION_IMPL *session; int locked; session = arg; conn = S2C(session); log = conn->log; locked = 0; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. */ if ((close_fh = log->log_close_fh) != NULL && (ret = __wt_log_extract_lognum(session, close_fh->name, &close_lsn.file)) == 0 && close_lsn.file < log->write_lsn.file) { /* * We've copied the file handle, clear out the one in * log structure to allow it to be set again. */ log->log_close_fh = NULL; /* * Set the close_end_lsn to the LSN immediately after * ours. That is, the beginning of the next log file. * We need to know the LSN file number of our own close * in case earlier calls are still in progress and the * next one to move the sync_lsn into the next file for * later syncs. */ close_lsn.offset = 0; close_end_lsn = close_lsn; close_end_lsn.file++; WT_ERR(__wt_fsync(session, close_fh)); __wt_spin_lock(session, &log->log_sync_lock); locked = 1; WT_ERR(__wt_close(session, &close_fh)); WT_ASSERT(session, WT_LOG_CMP(&close_end_lsn, &log->sync_lsn) >= 0); log->sync_lsn = close_end_lsn; WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); locked = 0; __wt_spin_unlock(session, &log->log_sync_lock); } /* * If a later thread asked for a background sync, do it now. */ if (WT_LOG_CMP(&log->bg_sync_lsn, &log->sync_lsn) > 0) { /* * Save the latest write LSN which is the minimum * we will have written to disk. */ min_lsn = log->write_lsn; /* * The sync LSN we asked for better be smaller than * the current written LSN. */ WT_ASSERT(session, WT_LOG_CMP(&log->bg_sync_lsn, &min_lsn) <= 0); WT_ERR(__wt_fsync(session, log->log_fh)); __wt_spin_lock(session, &log->log_sync_lock); locked = 1; /* * The sync LSN could have advanced while we were * writing to disk. */ if (WT_LOG_CMP(&log->sync_lsn, &min_lsn) <= 0) { log->sync_lsn = min_lsn; WT_ERR(__wt_cond_signal( session, log->log_sync_cond)); } locked = 0; __wt_spin_unlock(session, &log->log_sync_lock); } /* Wait until the next event. */ WT_ERR(__wt_cond_wait( session, conn->log_file_cond, WT_MILLION)); } if (0) { err: __wt_err(session, ret, "log close server error"); } if (locked) __wt_spin_unlock(session, &log->log_sync_lock); return (WT_THREAD_RET_VALUE); }
/* * __wt_connection_close -- * Close a connection handle. */ int __wt_connection_close(WT_CONNECTION_IMPL *conn) { WT_CONNECTION *wt_conn; WT_DECL_RET; WT_DLH *dlh; WT_SESSION_IMPL *s, *session; WT_TXN_GLOBAL *txn_global; u_int i; wt_conn = &conn->iface; txn_global = &conn->txn_global; session = conn->default_session; /* * We're shutting down. Make sure everything gets freed. * * It's possible that the eviction server is in the middle of a long * operation, with a transaction ID pinned. In that case, we will loop * here until the transaction ID is released, when the oldest * transaction ID will catch up with the current ID. */ for (;;) { WT_TRET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); if (txn_global->oldest_id == txn_global->current) break; __wt_yield(); } /* Clear any pending async ops. */ WT_TRET(__wt_async_flush(session)); /* * Shut down server threads other than the eviction server, which is * needed later to close btree handles. Some of these threads access * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ F_CLR(conn, WT_CONN_SERVER_RUN); WT_TRET(__wt_async_destroy(session)); WT_TRET(__wt_lsm_manager_destroy(session)); WT_TRET(__wt_sweep_destroy(session)); F_SET(conn, WT_CONN_CLOSING); WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_evict_destroy(session)); /* Shut down the lookaside table, after all eviction is complete. */ WT_TRET(__wt_las_destroy(session)); /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); /* Shut down metadata tracking, required before creating tables. */ WT_TRET(__wt_meta_track_destroy(session)); /* * Now that all data handles are closed, tell logging that a checkpoint * has completed then shut down the log manager (only after closing * data handles). The call to destroy the log manager is outside the * conditional because we allocate the log path so that printlog can * run without running logging or recovery. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE)) WT_TRET(__wt_txn_checkpoint_log( session, true, WT_TXN_LOG_CKPT_STOP, NULL)); F_CLR(conn, WT_CONN_LOG_SERVER_RUN); WT_TRET(__wt_logmgr_destroy(session)); /* Free memory for collators, compressors, data sources. */ WT_TRET(__wt_conn_remove_collator(session)); WT_TRET(__wt_conn_remove_compressor(session)); WT_TRET(__wt_conn_remove_data_source(session)); WT_TRET(__wt_conn_remove_encryptor(session)); WT_TRET(__wt_conn_remove_extractor(session)); /* Disconnect from shared cache - must be before cache destroy. */ WT_TRET(__wt_conn_cache_pool_destroy(session)); /* Discard the cache. */ WT_TRET(__wt_cache_destroy(session)); /* Discard transaction state. */ WT_TRET(__wt_txn_global_destroy(session)); /* Close extensions, first calling any unload entry point. */ while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { TAILQ_REMOVE(&conn->dlhqh, dlh, q); if (dlh->terminate != NULL) WT_TRET(dlh->terminate(wt_conn)); WT_TRET(__wt_dlclose(session, dlh)); } /* Close the lock file, opening up the database to other connections. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, &conn->lock_fh)); /* Close any file handles left open. */ WT_TRET(__wt_close_connection_close(session)); /* * Close the internal (default) session, and switch back to the dummy * session in case of any error messages from the remaining operations * while destroying the connection handle. */ if (session != &conn->dummy_session) { WT_TRET(session->iface.close(&session->iface, NULL)); session = conn->default_session = &conn->dummy_session; } /* * The session's split stash isn't discarded during normal session close * because it may persist past the life of the session. Discard it now. */ if ((s = conn->sessions) != NULL) for (i = 0; i < conn->session_size; ++s, ++i) __wt_split_stash_discard_all(session, s); /* * The session's hazard pointer memory isn't discarded during normal * session close because access to it isn't serialized. Discard it * now. */ if ((s = conn->sessions) != NULL) for (i = 0; i < conn->session_size; ++s, ++i) { /* * If hash arrays were allocated, free them now. */ __wt_free(session, s->dhhash); __wt_free(session, s->tablehash); __wt_free(session, s->hazard); } /* Destroy the handle. */ WT_TRET(__wt_connection_destroy(conn)); return (ret); }
/* * __wt_connection_close -- * Close a connection handle. */ int __wt_connection_close(WT_CONNECTION_IMPL *conn) { WT_CONNECTION *wt_conn; WT_DECL_RET; WT_DLH *dlh; WT_SESSION_IMPL *s, *session; u_int i; wt_conn = &conn->iface; session = conn->default_session; /* Shut down transactions (wait for in-flight operations to complete. */ WT_TRET(__wt_txn_global_shutdown(session)); /* Shut down the subsystems, ensuring workers see the state change. */ F_SET(conn, WT_CONN_CLOSING); WT_FULL_BARRIER(); /* * Clear any pending async operations and shut down the async worker * threads and system before closing LSM. */ WT_TRET(__wt_async_flush(session)); WT_TRET(__wt_async_destroy(session)); /* * Shut down server threads other than the eviction server, which is * needed later to close btree handles. Some of these threads access * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ WT_TRET(__wt_lsm_manager_destroy(session)); /* * Once the async and LSM threads exit, we shouldn't be opening any * more files. */ F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); WT_FULL_BARRIER(); WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_sweep_destroy(session)); /* The eviction server is shut down last. */ WT_TRET(__wt_evict_destroy(session)); /* Shut down the lookaside table, after all eviction is complete. */ WT_TRET(__wt_las_destroy(session)); /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); /* Shut down metadata tracking. */ WT_TRET(__wt_meta_track_destroy(session)); /* * Now that all data handles are closed, tell logging that a checkpoint * has completed then shut down the log manager (only after closing * data handles). The call to destroy the log manager is outside the * conditional because we allocate the log path so that printlog can * run without running logging or recovery. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE)) WT_TRET(__wt_txn_checkpoint_log( session, true, WT_TXN_LOG_CKPT_STOP, NULL)); WT_TRET(__wt_logmgr_destroy(session)); /* Free memory for collators, compressors, data sources. */ WT_TRET(__wt_conn_remove_collator(session)); WT_TRET(__wt_conn_remove_compressor(session)); WT_TRET(__wt_conn_remove_data_source(session)); WT_TRET(__wt_conn_remove_encryptor(session)); WT_TRET(__wt_conn_remove_extractor(session)); /* Disconnect from shared cache - must be before cache destroy. */ WT_TRET(__wt_conn_cache_pool_destroy(session)); /* Discard the cache. */ WT_TRET(__wt_cache_destroy(session)); /* Discard transaction state. */ __wt_txn_global_destroy(session); /* Close the lock file, opening up the database to other connections. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, &conn->lock_fh)); /* Close any file handles left open. */ WT_TRET(__wt_close_connection_close(session)); /* * Close the internal (default) session, and switch back to the dummy * session in case of any error messages from the remaining operations * while destroying the connection handle. */ if (session != &conn->dummy_session) { WT_TRET(session->iface.close(&session->iface, NULL)); session = conn->default_session = &conn->dummy_session; } /* * The session split stash, hazard information and handle arrays aren't * discarded during normal session close, they persist past the life of * the session. Discard them now. */ if (!F_ISSET(conn, WT_CONN_LEAK_MEMORY)) if ((s = conn->sessions) != NULL) for (i = 0; i < conn->session_size; ++s, ++i) { __wt_free(session, s->dhhash); __wt_stash_discard_all(session, s); __wt_free(session, s->hazard); } /* Destroy the file-system configuration. */ if (conn->file_system != NULL && conn->file_system->terminate != NULL) WT_TRET(conn->file_system->terminate( conn->file_system, (WT_SESSION *)session)); /* Close extensions, first calling any unload entry point. */ while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { TAILQ_REMOVE(&conn->dlhqh, dlh, q); if (dlh->terminate != NULL) WT_TRET(dlh->terminate(wt_conn)); WT_TRET(__wt_dlclose(session, dlh)); } /* Destroy the handle. */ __wt_connection_destroy(conn); return (ret); }
/* * __wt_log_scan -- * Scan the logs, calling a function on each record found. */ int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, void *cookie), void *cookie) { WT_CONNECTION_IMPL *conn; WT_ITEM buf; WT_DECL_RET; WT_FH *log_fh; WT_LOG *log; WT_LOG_RECORD *logrec; WT_LSN end_lsn, rd_lsn, start_lsn; off_t log_size; uint32_t allocsize, cksum, firstlog, lastlog, lognum, rdup_len, reclen; u_int i, logcount; int eol; char **logfiles; conn = S2C(session); log = conn->log; log_fh = NULL; logcount = 0; logfiles = NULL; eol = 0; WT_CLEAR(buf); /* * If the caller did not give us a callback function there is nothing * to do. */ if (func == NULL) return (0); if (LF_ISSET(WT_LOGSCAN_RECOVER)) WT_RET(__wt_verbose(session, WT_VERB_LOG, "__wt_log_scan truncating to %u/%" PRIuMAX, log->trunc_lsn.file, (uintmax_t)log->trunc_lsn.offset)); if (log != NULL) { allocsize = log->allocsize; if (lsnp == NULL) { if (LF_ISSET(WT_LOGSCAN_FIRST)) start_lsn = log->first_lsn; else if (LF_ISSET(WT_LOGSCAN_FROM_CKP)) start_lsn = log->ckpt_lsn; else return (WT_ERROR); /* Illegal usage */ } else { if (LF_ISSET(WT_LOGSCAN_FIRST|WT_LOGSCAN_FROM_CKP)) WT_RET_MSG(session, WT_ERROR, "choose either a start LSN or a start flag"); /* Offsets must be on allocation boundaries. */ if (lsnp->offset % allocsize != 0 || lsnp->file > log->fileid) return (WT_NOTFOUND); /* * Log cursors may not know the starting LSN. If an * LSN pointer is passed in, but it is the INIT_LSN, * start from the first_lsn. */ start_lsn = *lsnp; if (IS_INIT_LSN(&start_lsn)) start_lsn = log->first_lsn; } end_lsn = log->alloc_lsn; } else { /* * If logging is not configured, we can still print out the log * if log files exist. We just need to set the LSNs from what * is in the files versus what is in the live connection. */ /* * Set allocsize to the minimum alignment it could be. Larger * records and larger allocation boundaries should always be * a multiple of this. */ allocsize = LOG_ALIGN; lastlog = 0; firstlog = UINT32_MAX; WT_RET(__wt_log_get_files(session, &logfiles, &logcount)); if (logcount == 0) /* * Return it is not supported if none don't exist. */ return (ENOTSUP); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); lastlog = WT_MAX(lastlog, lognum); firstlog = WT_MIN(firstlog, lognum); } start_lsn.file = firstlog; end_lsn.file = lastlog; start_lsn.offset = end_lsn.offset = 0; __wt_log_files_free(session, logfiles, logcount); logfiles = NULL; } WT_ERR(__log_openfile(session, 0, &log_fh, start_lsn.file)); WT_ERR(__log_filesize(session, log_fh, &log_size)); rd_lsn = start_lsn; WT_ERR(__wt_buf_initsize(session, &buf, LOG_ALIGN)); for (;;) { if (rd_lsn.offset + allocsize > log_size) { advance: /* * If we read the last record, go to the next file. */ WT_ERR(__wt_close(session, log_fh)); log_fh = NULL; eol = 1; /* * Truncate this log file before we move to the next. */ if (LF_ISSET(WT_LOGSCAN_RECOVER)) WT_ERR(__log_truncate(session, &rd_lsn, 1)); rd_lsn.file++; rd_lsn.offset = 0; /* * Avoid an error message when we reach end of log * by checking here. */ if (rd_lsn.file > end_lsn.file) break; WT_ERR(__log_openfile( session, 0, &log_fh, rd_lsn.file)); WT_ERR(__log_filesize(session, log_fh, &log_size)); continue; } /* * Read the minimum allocation size a record could be. */ WT_ASSERT(session, buf.memsize >= allocsize); WT_ERR(__wt_read(session, log_fh, rd_lsn.offset, (size_t)allocsize, buf.mem)); /* * First 8 bytes is the real record length. See if we * need to read more than the allocation size. We expect * that we rarely will have to read more. Most log records * will be fairly small. */ reclen = *(uint32_t *)buf.mem; /* * Log files are pre-allocated. We never expect a zero length * unless we've reached the end of the log. The log can be * written out of order, so when recovery finds the end of * the log, truncate the file and remove any later log files * that may exist. */ if (reclen == 0) { /* This LSN is the end. */ break; } rdup_len = __wt_rduppo2(reclen, allocsize); if (reclen > allocsize) { /* * The log file end could be the middle of this * log record. */ if (rd_lsn.offset + rdup_len > log_size) goto advance; /* * We need to round up and read in the full padded * record, especially for direct I/O. */ WT_ERR(__wt_buf_grow(session, &buf, rdup_len)); WT_ERR(__wt_read(session, log_fh, rd_lsn.offset, (size_t)rdup_len, buf.mem)); WT_STAT_FAST_CONN_INCR(session, log_scan_rereads); } /* * We read in the record, verify checksum. */ buf.size = reclen; logrec = (WT_LOG_RECORD *)buf.mem; cksum = logrec->checksum; logrec->checksum = 0; logrec->checksum = __wt_cksum(logrec, logrec->len); if (logrec->checksum != cksum) { /* * A checksum mismatch means we have reached the end of * the useful part of the log. This should be found on * the first pass through recovery. In the second pass * where we truncate the log, this is where it should * end. */ if (log != NULL) log->trunc_lsn = rd_lsn; break; } /* * We have a valid log record. If it is not the log file * header, invoke the callback. */ WT_STAT_FAST_CONN_INCR(session, log_scan_records); if (rd_lsn.offset != 0) { WT_ERR((*func)(session, &buf, &rd_lsn, cookie)); if (LF_ISSET(WT_LOGSCAN_ONE)) break; } rd_lsn.offset += (off_t)rdup_len; } /* Truncate if we're in recovery. */ if (LF_ISSET(WT_LOGSCAN_RECOVER) && LOG_CMP(&rd_lsn, &log->trunc_lsn) < 0) WT_ERR(__log_truncate(session, &rd_lsn, 0)); err: WT_STAT_FAST_CONN_INCR(session, log_scans); if (logfiles != NULL) __wt_log_files_free(session, logfiles, logcount); __wt_buf_free(session, &buf); /* * If the caller wants one record and it is at the end of log, * return WT_NOTFOUND. */ if (LF_ISSET(WT_LOGSCAN_ONE) && eol && ret == 0) ret = WT_NOTFOUND; if (ret == ENOENT) ret = 0; if (log_fh != NULL) WT_TRET(__wt_close(session, log_fh)); return (ret); }
/* * __log_file_server -- * The log file server thread. This worker thread manages * log file operations such as closing and syncing. */ static WT_THREAD_RET __log_file_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; WT_LSN close_end_lsn, min_lsn; WT_SESSION_IMPL *session; uint32_t filenum; bool locked; session = arg; conn = S2C(session); log = conn->log; locked = false; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. */ if ((close_fh = log->log_close_fh) != NULL) { WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum)); /* * We update the close file handle before updating the * close LSN when changing files. It is possible we * could see mismatched settings. If we do, yield * until it is set. This should rarely happen. */ while (log->log_close_lsn.l.file < filenum) __wt_yield(); if (__wt_log_cmp( &log->write_lsn, &log->log_close_lsn) >= 0) { /* * We've copied the file handle, clear out the * one in the log structure to allow it to be * set again. Copy the LSN before clearing * the file handle. * Use a barrier to make sure the compiler does * not reorder the following two statements. */ close_end_lsn = log->log_close_lsn; WT_FULL_BARRIER(); log->log_close_fh = NULL; /* * Set the close_end_lsn to the LSN immediately * after ours. That is, the beginning of the * next log file. We need to know the LSN * file number of our own close in case earlier * calls are still in progress and the next one * to move the sync_lsn into the next file for * later syncs. */ WT_ERR(__wt_fsync(session, close_fh, true)); /* * We want to have the file size reflect actual * data with minimal pre-allocated zeroed space. * We can't truncate the file during hot backup, * or the underlying file system may not support * truncate: both are OK, it's just more work * during cursor traversal. */ if (!conn->hot_backup) { __wt_readlock( session, conn->hot_backup_lock); if (!conn->hot_backup) WT_ERR_ERROR_OK( __wt_ftruncate(session, close_fh, close_end_lsn.l.offset), ENOTSUP); __wt_readunlock( session, conn->hot_backup_lock); } WT_SET_LSN(&close_end_lsn, close_end_lsn.l.file + 1, 0); __wt_spin_lock(session, &log->log_sync_lock); locked = true; WT_ERR(__wt_close(session, &close_fh)); WT_ASSERT(session, __wt_log_cmp( &close_end_lsn, &log->sync_lsn) >= 0); log->sync_lsn = close_end_lsn; __wt_cond_signal(session, log->log_sync_cond); locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } } /* * If a later thread asked for a background sync, do it now. */ if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) { /* * Save the latest write LSN which is the minimum * we will have written to disk. */ min_lsn = log->write_lsn; /* * We have to wait until the LSN we asked for is * written. If it isn't signal the wrlsn thread * to get it written. * * We also have to wait for the written LSN and the * sync LSN to be in the same file so that we know we * have synchronized all earlier log files. */ if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) { /* * If the sync file is behind either the one * wanted for a background sync or the write LSN * has moved to another file continue to let * this worker thread process that older file * immediately. */ if ((log->sync_lsn.l.file < log->bg_sync_lsn.l.file) || (log->sync_lsn.l.file < min_lsn.l.file)) continue; WT_ERR(__wt_fsync(session, log->log_fh, true)); __wt_spin_lock(session, &log->log_sync_lock); locked = true; /* * The sync LSN could have advanced while we * were writing to disk. */ if (__wt_log_cmp( &log->sync_lsn, &min_lsn) <= 0) { WT_ASSERT(session, min_lsn.l.file == log->sync_lsn.l.file); log->sync_lsn = min_lsn; __wt_cond_signal( session, log->log_sync_cond); } locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } else { __wt_cond_auto_signal( session, conn->log_wrlsn_cond); /* * We do not want to wait potentially a second * to process this. Yield to give the wrlsn * thread a chance to run and try again in * this case. */ __wt_yield(); continue; } } /* Wait until the next event. */ __wt_cond_wait(session, conn->log_file_cond, WT_MILLION / 10); } if (0) { err: __wt_err(session, ret, "log close server error"); } if (locked) __wt_spin_unlock(session, &log->log_sync_lock); return (WT_THREAD_RET_VALUE); }
/* * __conn_config_file -- * Read in any WiredTiger_config file in the home directory. */ static int __conn_config_file(WT_SESSION_IMPL *session, const char **cfg, WT_ITEM **cbufp) { WT_DECL_ITEM(cbuf); WT_DECL_RET; WT_FH *fh; off_t size; uint32_t len; int exist, quoted; uint8_t *p, *t; *cbufp = NULL; /* Returned buffer */ fh = NULL; /* Check for an optional configuration file. */ #define WT_CONFIGFILE "WiredTiger.config" WT_RET(__wt_exist(session, WT_CONFIGFILE, &exist)); if (!exist) return (0); /* Open the configuration file. */ WT_RET(__wt_open(session, WT_CONFIGFILE, 0, 0, 0, &fh)); WT_ERR(__wt_filesize(session, fh, &size)); if (size == 0) goto err; /* * Sanity test: a 100KB configuration file would be insane. (There's * no practical reason to limit the file size, but I can either limit * the file size to something rational, or I can add code to test if * the off_t size is larger than a uint32_t, which is more complicated * and a waste of time.) */ if (size > 100 * 1024) WT_ERR_MSG(session, EFBIG, WT_CONFIGFILE); len = (uint32_t)size; /* * Copy the configuration file into memory, with a little slop, I'm not * interested in debugging off-by-ones. * * The beginning of a file is the same as if we run into an unquoted * newline character, simplify the parsing loop by pretending that's * what we're doing. */ WT_ERR(__wt_scr_alloc(session, len + 10, &cbuf)); WT_ERR( __wt_read(session, fh, (off_t)0, len, ((uint8_t *)cbuf->mem) + 1)); ((uint8_t *)cbuf->mem)[0] = '\n'; cbuf->size = len + 1; /* * Collapse the file's lines into a single string: newline characters * are replaced with commas unless the newline is quoted or backslash * escaped. Comment lines (an unescaped newline where the next non- * white-space character is a hash), are discarded. */ for (quoted = 0, p = t = cbuf->mem; len > 0;) { /* * Backslash pairs pass through untouched, unless immediately * preceding a newline, in which case both the backslash and * the newline are discarded. Backslash characters escape * quoted characters, too, that is, a backslash followed by a * quote doesn't start or end a quoted string. */ if (*p == '\\' && len > 1) { if (p[1] != '\n') { *t++ = p[0]; *t++ = p[1]; } p += 2; len -= 2; continue; } /* * If we're in a quoted string, or starting a quoted string, * take all characters, including white-space and newlines. */ if (quoted || *p == '"') { if (*p == '"') quoted = !quoted; *t++ = *p++; --len; continue; } /* Everything else gets taken, except for newline characters. */ if (*p != '\n') { *t++ = *p++; --len; continue; } /* * Replace any newline characters with commas (and strings of * commas are safe). * * After any newline, skip to a non-white-space character; if * the next character is a hash mark, skip to the next newline. */ for (;;) { for (*t++ = ','; --len > 0 && isspace(*++p);) ; if (len == 0) break; if (*p != '#') break; while (--len > 0 && *++p != '\n') ; if (len == 0) break; } } *t = '\0'; #if 0 fprintf(stderr, "file config: {%s}\n", (const char *)cbuf->data); #endif /* Check the configuration string. */ WT_ERR(__wt_config_check( session, __wt_confchk_wiredtiger_open, cbuf->data, 0)); /* * The configuration file falls between the default configuration and * the wiredtiger_open() configuration, overriding the defaults but not * overriding the wiredtiger_open() configuration. */ while (cfg[1] != NULL) ++cfg; cfg[1] = cfg[0]; cfg[0] = cbuf->data; *cbufp = cbuf; if (0) { err: if (cbuf != NULL) __wt_buf_free(session, cbuf); } if (fh != NULL) WT_TRET(__wt_close(session, fh)); return (ret); }
/* * __conn_single -- * Confirm that no other thread of control is using this database. */ static int __conn_single(WT_SESSION_IMPL *session, const char **cfg) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn, *t; WT_DECL_RET; off_t size; uint32_t len; int created; char buf[256]; conn = S2C(session); /* * Optionally create the wiredtiger flag file if it doesn't already * exist. We don't actually care if we create it or not, the "am I the * only locker" tests are all that matter. */ WT_RET(__wt_config_gets(session, cfg, "create", &cval)); WT_RET(__wt_open(session, WT_SINGLETHREAD, cval.val == 0 ? 0 : 1, 0, 0, &conn->lock_fh)); /* * Lock a byte of the file: if we don't get the lock, some other process * is holding it, we're done. Note the file may be zero-length length, * and that's OK, the underlying call supports acquisition of locks past * the end-of-file. */ if (__wt_bytelock(conn->lock_fh, (off_t)0, 1) != 0) WT_ERR_MSG(session, EBUSY, "%s", "WiredTiger database is already being managed by another " "process"); /* Check to see if another thread of control has this database open. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_FOREACH(t, &__wt_process.connqh, q) if (t->home != NULL && t != conn && strcmp(t->home, conn->home) == 0) { ret = EBUSY; break; } __wt_spin_unlock(session, &__wt_process.spinlock); if (ret != 0) WT_ERR_MSG(session, EBUSY, "%s", "WiredTiger database is already being managed by another " "thread in this process"); /* * If the size of the file is 0, we created it (or we're racing with * the thread that created it, it doesn't matter), write some bytes * into the file. Strictly speaking, this isn't even necessary, but * zero-length files always make me nervous. */ WT_ERR(__wt_filesize(session, conn->lock_fh, &size)); if (size == 0) { len = (uint32_t)snprintf(buf, sizeof(buf), "%s\n%s\n", WT_SINGLETHREAD, wiredtiger_version(NULL, NULL, NULL)); WT_ERR(__wt_write( session, conn->lock_fh, (off_t)0, (uint32_t)len, buf)); created = 1; } else created = 0; /* * If we found a zero-length WiredTiger lock file, and eventually ended * as the database owner, return that we created the database. (There * is a theoretical chance that another process created the WiredTiger * lock file but we won the race to add the WT_CONNECTION_IMPL structure * to the process' list. It doesn't much matter, only one thread will * be told it created the database.) */ conn->is_new = created; return (0); err: if (conn->lock_fh != NULL) { WT_TRET(__wt_close(session, conn->lock_fh)); conn->lock_fh = NULL; } return (ret); }
/* * __log_release -- * Release a log slot. */ static int __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; WT_LSN sync_lsn; size_t write_size; WT_DECL_SPINLOCK_ID(id); /* Must appear last */ conn = S2C(session); log = conn->log; /* * If we're going to have to close our log file, make a local copy * of the file handle structure. */ close_fh = NULL; if (F_ISSET(slot, SLOT_CLOSEFH)) { close_fh = log->log_close_fh; log->log_close_fh = NULL; F_CLR(slot, SLOT_CLOSEFH); } /* Write the buffered records */ if (F_ISSET(slot, SLOT_BUFFERED)) { write_size = (size_t) (slot->slot_end_lsn.offset - slot->slot_start_offset); WT_ERR(__wt_write(session, slot->slot_fh, slot->slot_start_offset, write_size, slot->slot_buf.mem)); } /* * Wait for earlier groups to finish, otherwise there could be holes * in the log file. */ while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0) __wt_yield(); log->write_lsn = slot->slot_end_lsn; /* * Try to consolidate calls to fsync to wait less. Acquire a spin lock * so that threads finishing writing to the log will wait while the * current fsync completes and advance log->write_lsn. */ while (F_ISSET(slot, SLOT_SYNC) && LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) { if (__wt_spin_trylock(session, &log->log_sync_lock, &id) != 0) { (void)__wt_cond_wait( session, log->log_sync_cond, 10000); continue; } /* * Record the current end of log after we grabbed the lock. * That is how far our fsync call with guarantee. */ sync_lsn = log->write_lsn; if (LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) { WT_STAT_FAST_CONN_INCR(session, log_sync); ret = __wt_fsync(session, log->log_fh); if (ret == 0) { F_CLR(slot, SLOT_SYNC); log->sync_lsn = sync_lsn; ret = __wt_cond_signal( session, log->log_sync_cond); } } __wt_spin_unlock(session, &log->log_sync_lock); WT_ERR(ret); } if (F_ISSET(slot, SLOT_BUF_GROW)) { WT_STAT_FAST_CONN_INCR(session, log_buffer_grow); F_CLR(slot, SLOT_BUF_GROW); WT_STAT_FAST_CONN_INCRV(session, log_buffer_size, slot->slot_buf.memsize); WT_ERR(__wt_buf_grow(session, &slot->slot_buf, slot->slot_buf.memsize * 2)); } /* * If we have a file to close, close it now. */ if (close_fh) WT_ERR(__wt_close(session, close_fh)); err: if (ret != 0 && slot->slot_error == 0) slot->slot_error = ret; return (ret); }
/* * __log_file_server -- * The log file server thread. This worker thread manages * log file operations such as closing and syncing. */ static WT_THREAD_RET __log_file_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; WT_LSN close_end_lsn, min_lsn; WT_SESSION_IMPL *session; uint32_t filenum; int locked; session = arg; conn = S2C(session); log = conn->log; locked = 0; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. */ if ((close_fh = log->log_close_fh) != NULL) { WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum)); /* * We update the close file handle before updating the * close LSN when changing files. It is possible we * could see mismatched settings. If we do, yield * until it is set. This should rarely happen. */ while (log->log_close_lsn.file < filenum) __wt_yield(); if (__wt_log_cmp( &log->write_lsn, &log->log_close_lsn) >= 0) { /* * We've copied the file handle, clear out the * one in the log structure to allow it to be * set again. Copy the LSN before clearing * the file handle. * Use a barrier to make sure the compiler does * not reorder the following two statements. */ close_end_lsn = log->log_close_lsn; WT_FULL_BARRIER(); log->log_close_fh = NULL; /* * Set the close_end_lsn to the LSN immediately * after ours. That is, the beginning of the * next log file. We need to know the LSN * file number of our own close in case earlier * calls are still in progress and the next one * to move the sync_lsn into the next file for * later syncs. */ close_end_lsn.file++; close_end_lsn.offset = 0; WT_ERR(__wt_fsync(session, close_fh)); __wt_spin_lock(session, &log->log_sync_lock); locked = 1; WT_ERR(__wt_close(session, &close_fh)); WT_ASSERT(session, __wt_log_cmp( &close_end_lsn, &log->sync_lsn) >= 0); log->sync_lsn = close_end_lsn; WT_ERR(__wt_cond_signal( session, log->log_sync_cond)); locked = 0; __wt_spin_unlock(session, &log->log_sync_lock); } } /* * If a later thread asked for a background sync, do it now. */ if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) { /* * Save the latest write LSN which is the minimum * we will have written to disk. */ min_lsn = log->write_lsn; /* * We have to wait until the LSN we asked for is * written. If it isn't signal the wrlsn thread * to get it written. */ if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) { WT_ERR(__wt_fsync(session, log->log_fh)); __wt_spin_lock(session, &log->log_sync_lock); locked = 1; /* * The sync LSN could have advanced while we * were writing to disk. */ if (__wt_log_cmp( &log->sync_lsn, &min_lsn) <= 0) { log->sync_lsn = min_lsn; WT_ERR(__wt_cond_signal( session, log->log_sync_cond)); } locked = 0; __wt_spin_unlock(session, &log->log_sync_lock); } else { WT_ERR(__wt_cond_signal( session, conn->log_wrlsn_cond)); /* * We do not want to wait potentially a second * to process this. Yield to give the wrlsn * thread a chance to run and try again in * this case. */ __wt_yield(); continue; } } /* Wait until the next event. */ WT_ERR(__wt_cond_wait( session, conn->log_file_cond, WT_MILLION)); } if (0) { err: __wt_err(session, ret, "log close server error"); } if (locked) __wt_spin_unlock(session, &log->log_sync_lock); return (WT_THREAD_RET_VALUE); }
/* * __wt_log_read -- * Read the log record at the given LSN. Return the record (including * the log header) in the WT_ITEM. Caller is responsible for freeing it. */ int __wt_log_read(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *log_fh; WT_LOG *log; WT_LOG_RECORD *logrec; uint32_t cksum, rdup_len, reclen; WT_UNUSED(flags); /* * If the caller didn't give us an LSN or something to return, * there's nothing to do. */ if (lsnp == NULL || record == NULL) return (0); conn = S2C(session); log = conn->log; /* * If the offset isn't on an allocation boundary it must be wrong. */ if (lsnp->offset % log->allocsize != 0 || lsnp->file > log->fileid) return (WT_NOTFOUND); WT_RET(__log_openfile(session, 0, &log_fh, lsnp->file)); /* * Read the minimum allocation size a record could be. */ WT_ERR(__wt_buf_init(session, record, log->allocsize)); WT_ERR(__wt_read(session, log_fh, lsnp->offset, (size_t)log->allocsize, record->mem)); /* * First 4 bytes is the real record length. See if we * need to read more than the allocation size. We expect * that we rarely will have to read more. Most log records * will be fairly small. */ reclen = *(uint32_t *)record->mem; if (reclen == 0) { ret = WT_NOTFOUND; goto err; } if (reclen > log->allocsize) { rdup_len = __wt_rduppo2(reclen, log->allocsize); WT_ERR(__wt_buf_grow(session, record, rdup_len)); WT_ERR(__wt_read(session, log_fh, lsnp->offset, (size_t)rdup_len, record->mem)); } /* * We read in the record, verify checksum. */ logrec = (WT_LOG_RECORD *)record->mem; cksum = logrec->checksum; logrec->checksum = 0; logrec->checksum = __wt_cksum(logrec, logrec->len); if (logrec->checksum != cksum) WT_ERR_MSG(session, WT_ERROR, "log_read: Bad checksum"); record->size = logrec->len; WT_STAT_FAST_CONN_INCR(session, log_reads); err: WT_TRET(__wt_close(session, log_fh)); return (ret); }