/* * __sweep_server -- * The handle sweep server thread. */ static void * __sweep_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; session = arg; conn = S2C(session); /* * Sweep for dead handles. */ while (F_ISSET(conn, WT_CONN_SERVER_RUN) && F_ISSET(conn, WT_CONN_SERVER_SWEEP)) { /* Wait until the next event. */ WT_ERR( __wt_cond_wait(session, conn->sweep_cond, 30 * WT_MILLION)); /* Sweep the handles. */ WT_ERR(__sweep(session)); } if (0) { err: WT_PANIC_MSG(session, ret, "handle sweep server error"); } return (NULL); }
/* * __wt_thread_run -- * General wrapper for any thread. */ WT_THREAD_RET __wt_thread_run(void *arg) { WT_DECL_RET; WT_SESSION_IMPL *session; WT_THREAD *thread; thread = (WT_THREAD*)arg; session = thread->session; ret = thread->run_func(session, thread); if (ret != 0 && F_ISSET(thread, WT_THREAD_PANIC_FAIL)) WT_PANIC_MSG(session, ret, "Unrecoverable utility thread error"); /* * The three cases when threads are expected to stop are: * 1. When recovery is done. * 2. When the connection is closing. * 3. When a shutdown has been requested via clearing the run flag. */ WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_RUN) || F_ISSET(S2C(session), WT_CONN_CLOSING | WT_CONN_RECOVERING)); return (WT_THREAD_RET_VALUE); }
/* * __log_wrlsn_server -- * The log wrlsn server thread. */ static WT_THREAD_RET __log_wrlsn_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_LSN prev; WT_SESSION_IMPL *session; int yield; bool did_work; session = arg; conn = S2C(session); log = conn->log; yield = 0; WT_INIT_LSN(&prev); while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Write out any log record buffers if anything was done * since last time. Only call the function to walk the * slots if the system is not idle. On an idle system * the alloc_lsn will not advance and the written lsn will * match the alloc_lsn. */ if (__wt_log_cmp(&prev, &log->alloc_lsn) != 0 || __wt_log_cmp(&log->write_lsn, &log->alloc_lsn) != 0) __wt_log_wrlsn(session, &yield); else WT_STAT_CONN_INCR(session, log_write_lsn_skip); prev = log->alloc_lsn; did_work = yield == 0; /* * If __wt_log_wrlsn did work we want to yield instead of sleep. */ if (yield++ < WT_THOUSAND) __wt_yield(); else __wt_cond_auto_wait( session, conn->log_wrlsn_cond, did_work, NULL); } /* * On close we need to do this one more time because there could * be straggling log writes that need to be written. */ WT_ERR(__wt_log_force_write(session, 1, NULL)); __wt_log_wrlsn(session, NULL); if (0) { err: WT_PANIC_MSG(session, ret, "log wrlsn server error"); } return (WT_THREAD_RET_VALUE); }
/* * __ckpt_server -- * The checkpoint server thread. */ static void * __ckpt_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION *wt_session; WT_SESSION_IMPL *session; session = arg; conn = S2C(session); wt_session = (WT_SESSION *)session; while (F_ISSET(conn, WT_CONN_SERVER_RUN) && F_ISSET(conn, WT_CONN_SERVER_CHECKPOINT)) { /* Checkpoint the database. */ WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config)); /* Reset. */ if (conn->ckpt_logsize) { __wt_log_written_reset(session); conn->ckpt_signalled = 0; /* * In case we crossed the log limit during the * checkpoint and the condition variable was already * signalled, do a tiny wait to clear it so we don't do * another checkpoint immediately. */ WT_ERR(__wt_cond_wait(session, conn->ckpt_cond, 1)); } /* * Wait... * NOTE: If the user only configured logsize, then usecs * will be 0 and this wait won't return until signalled. */ WT_ERR( __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs)); } if (0) { err: WT_PANIC_MSG(session, ret, "checkpoint server error"); } return (NULL); }
/* * __wt_optrack_record_funcid -- * Allocate and record optrack function ID. */ void __wt_optrack_record_funcid( WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp) { static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */ WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(tmp); WT_DECL_RET; wt_off_t fsize; bool locked; conn = S2C(session); locked = false; WT_ERR(__wt_scr_alloc(session, strlen(func) + 32, &tmp)); __wt_spin_lock(session, &conn->optrack_map_spinlock); locked = true; if (*func_idp == 0) { *func_idp = ++optrack_uid; WT_ERR(__wt_buf_fmt( session, tmp, "%" PRIu16 " %s\n", *func_idp, func)); WT_ERR(__wt_filesize(session, conn->optrack_map_fh, &fsize)); WT_ERR(__wt_write(session, conn->optrack_map_fh, fsize, tmp->size, tmp->data)); } if (0) { err: WT_PANIC_MSG(session, ret, "operation tracking initialization failure"); } if (locked) __wt_spin_unlock(session, &conn->optrack_map_spinlock); __wt_scr_free(session, &tmp); }
/* * __lsm_worker -- * A thread that executes work units for all open LSM trees. */ static WT_THREAD_RET __lsm_worker(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LSM_WORK_UNIT *entry; WT_LSM_WORKER_ARGS *cookie; WT_SESSION_IMPL *session; int progress, ran; cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; conn = S2C(session); entry = NULL; while (F_ISSET(conn, WT_CONN_SERVER_RUN) && F_ISSET(cookie, WT_LSM_WORKER_RUN)) { progress = 0; /* * Workers process the different LSM work queues. Some workers * can handle several or all work unit types. So the code is * prioritized so important operations happen first. * Switches are the highest priority. */ while (FLD_ISSET(cookie->type, WT_LSM_WORK_SWITCH) && (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_SWITCH, &entry)) == 0 && entry != NULL) WT_ERR( __wt_lsm_work_switch(session, &entry, &progress)); /* Flag an error if the pop failed. */ WT_ERR(ret); /* * Next the general operations. */ ret = __lsm_worker_general_op(session, cookie, &ran); if (ret == EBUSY || ret == WT_NOTFOUND) ret = 0; WT_ERR(ret); progress = progress || ran; /* * Finally see if there is any merge work we can do. This is * last because the earlier operations may result in adding * merge work to the queue. */ if (FLD_ISSET(cookie->type, WT_LSM_WORK_MERGE) && (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_MERGE, &entry)) == 0 && entry != NULL) { WT_ASSERT(session, entry->type == WT_LSM_WORK_MERGE); ret = __wt_lsm_merge(session, entry->lsm_tree, cookie->id); if (ret == WT_NOTFOUND) { F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACTING); ret = 0; } else if (ret == EBUSY) ret = 0; /* Paranoia: clear session state. */ session->dhandle = NULL; __wt_lsm_manager_free_work_unit(session, entry); entry = NULL; progress = 1; } /* Flag an error if the pop failed. */ WT_ERR(ret); /* Don't busy wait if there was any work to do. */ if (!progress) { WT_ERR( __wt_cond_wait(session, cookie->work_cond, 10000)); continue; } } if (ret != 0) { err: __wt_lsm_manager_free_work_unit(session, entry); WT_PANIC_MSG(session, ret, "Error in LSM worker thread %d", cookie->id); } return (WT_THREAD_RET_VALUE); }
/* * __wt_cond_wait_signal -- * Wait on a mutex, optionally timing out. If we get it before the time * out period expires, let the caller know. */ void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) { BOOL sleepret; DWORD milliseconds, windows_error; bool locked; uint64_t milliseconds64; locked = false; /* Fast path if already signalled. */ *signalled = true; if (__wt_atomic_addi32(&cond->waiters, 1) == 0) return; __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name); WT_STAT_CONN_INCR(session, cond_wait); EnterCriticalSection(&cond->mtx); locked = true; /* * It's possible to race with threads waking us up. That's not a problem * if there are multiple wakeups because the next wakeup will get us, or * if we're only pausing for a short period. It's a problem if there's * only a single wakeup, our waker is likely waiting for us to exit. * After acquiring the mutex (so we're guaranteed to be awakened by any * future wakeup call), optionally check if we're OK to keep running. * This won't ensure our caller won't just loop and call us again, but * at least it's not our fault. * * Assert we're not waiting longer than a second if not checking the * run status. */ WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION); if (run_func != NULL && !run_func(session)) goto skipping; if (usecs > 0) { milliseconds64 = usecs / WT_THOUSAND; /* * Check for 32-bit unsigned integer overflow * INFINITE is max unsigned int on Windows */ if (milliseconds64 >= INFINITE) milliseconds64 = INFINITE - 1; milliseconds = (DWORD)milliseconds64; /* * 0 would mean the CV sleep becomes a TryCV which we do not * want */ if (milliseconds == 0) milliseconds = 1; sleepret = SleepConditionVariableCS( &cond->cond, &cond->mtx, milliseconds); } else sleepret = SleepConditionVariableCS( &cond->cond, &cond->mtx, INFINITE); /* * SleepConditionVariableCS returns non-zero on success, 0 on timeout * or failure. */ if (sleepret == 0) { windows_error = __wt_getlasterror(); if (windows_error == ERROR_TIMEOUT) { skipping: *signalled = false; sleepret = 1; } } (void)__wt_atomic_subi32(&cond->waiters, 1); if (locked) LeaveCriticalSection(&cond->mtx); if (sleepret != 0) return; __wt_err(session, __wt_map_windows_error(windows_error), "SleepConditionVariableCS: %s: %s", cond->name, __wt_formatmessage(session, windows_error)); WT_PANIC_MSG(session, __wt_map_windows_error(windows_error), "SleepConditionVariableCS: %s", cond->name); }
/* * __log_server -- * The log server thread. */ static WT_THREAD_RET __log_server(void *arg) { struct timespec start, now; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_SESSION_IMPL *session; uint64_t timediff; bool did_work, signalled; session = arg; conn = S2C(session); log = conn->log; signalled = false; /* * Set this to the number of milliseconds we want to run archive and * pre-allocation. Start it so that we run on the first time through. */ timediff = WT_THOUSAND; /* * The log server thread does a variety of work. It forces out any * buffered log writes. It pre-allocates log files and it performs * log archiving. The reason the wrlsn thread does not force out * the buffered writes is because we want to process and move the * write_lsn forward as quickly as possible. The same reason applies * to why the log file server thread does not force out the writes. * That thread does fsync calls which can take a long time and we * don't want log records sitting in the buffer over the time it * takes to sync out an earlier file. */ did_work = true; while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Slots depend on future activity. Force out buffered * writes in case we are idle. This cannot be part of the * wrlsn thread because of interaction advancing the write_lsn * and a buffer may need to wait for the write_lsn to advance * in the case of a synchronous buffer. We end up with a hang. */ WT_ERR_BUSY_OK(__wt_log_force_write(session, 0, &did_work)); /* * We don't want to archive or pre-allocate files as often as * we want to force out log buffers. Only do it once per second * or if the condition was signalled. */ if (timediff >= WT_THOUSAND || signalled) { /* * Perform log pre-allocation. */ if (conn->log_prealloc > 0) { /* * Log file pre-allocation is disabled when a * hot backup cursor is open because we have * agreed not to rename or remove any files in * the database directory. */ __wt_readlock(session, &conn->hot_backup_lock); if (!conn->hot_backup) ret = __log_prealloc_once(session); __wt_readunlock( session, &conn->hot_backup_lock); WT_ERR(ret); } /* * Perform the archive. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) { if (__wt_try_writelock( session, &log->log_archive_lock) == 0) { ret = __log_archive_once(session, 0); __wt_writeunlock( session, &log->log_archive_lock); WT_ERR(ret); } else __wt_verbose(session, WT_VERB_LOG, "%s", "log_archive: Blocked due to open " "log cursor holding archive lock"); } } /* Wait until the next event. */ __wt_epoch(session, &start); __wt_cond_auto_wait_signal( session, conn->log_cond, did_work, NULL, &signalled); __wt_epoch(session, &now); timediff = WT_TIMEDIFF_MS(now, start); } if (0) { err: WT_PANIC_MSG(session, ret, "log server error"); } return (WT_THREAD_RET_VALUE); }
/* * __log_file_server -- * The log file server thread. This worker thread manages * log file operations such as closing and syncing. */ static WT_THREAD_RET __log_file_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_FH *close_fh; WT_LOG *log; WT_LSN close_end_lsn, min_lsn; WT_SESSION_IMPL *session; uint64_t yield_count; uint32_t filenum; bool locked; session = arg; conn = S2C(session); log = conn->log; locked = false; yield_count = 0; while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. */ if ((close_fh = log->log_close_fh) != NULL) { WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum)); /* * The closing file handle should have a correct close * LSN. */ WT_ASSERT(session, log->log_close_lsn.l.file == filenum); if (__wt_log_cmp( &log->write_lsn, &log->log_close_lsn) >= 0) { /* * We've copied the file handle, clear out the * one in the log structure to allow it to be * set again. Copy the LSN before clearing * the file handle. * Use a barrier to make sure the compiler does * not reorder the following two statements. */ close_end_lsn = log->log_close_lsn; WT_FULL_BARRIER(); log->log_close_fh = NULL; /* * Set the close_end_lsn to the LSN immediately * after ours. That is, the beginning of the * next log file. We need to know the LSN * file number of our own close in case earlier * calls are still in progress and the next one * to move the sync_lsn into the next file for * later syncs. */ WT_ERR(__wt_fsync(session, close_fh, true)); /* * We want to have the file size reflect actual * data with minimal pre-allocated zeroed space. * We can't truncate the file during hot backup, * or the underlying file system may not support * truncate: both are OK, it's just more work * during cursor traversal. */ if (!conn->hot_backup) { __wt_readlock( session, &conn->hot_backup_lock); if (!conn->hot_backup) WT_ERR_ERROR_OK( __wt_ftruncate(session, close_fh, close_end_lsn.l.offset), ENOTSUP); __wt_readunlock( session, &conn->hot_backup_lock); } WT_SET_LSN(&close_end_lsn, close_end_lsn.l.file + 1, 0); __wt_spin_lock(session, &log->log_sync_lock); locked = true; WT_ERR(__wt_close(session, &close_fh)); WT_ASSERT(session, __wt_log_cmp( &close_end_lsn, &log->sync_lsn) >= 0); log->sync_lsn = close_end_lsn; __wt_cond_signal(session, log->log_sync_cond); locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } } /* * If a later thread asked for a background sync, do it now. */ if (__wt_log_cmp(&log->bg_sync_lsn, &log->sync_lsn) > 0) { /* * Save the latest write LSN which is the minimum * we will have written to disk. */ min_lsn = log->write_lsn; /* * We have to wait until the LSN we asked for is * written. If it isn't signal the wrlsn thread * to get it written. * * We also have to wait for the written LSN and the * sync LSN to be in the same file so that we know we * have synchronized all earlier log files. */ if (__wt_log_cmp(&log->bg_sync_lsn, &min_lsn) <= 0) { /* * If the sync file is behind either the one * wanted for a background sync or the write LSN * has moved to another file continue to let * this worker thread process that older file * immediately. */ if ((log->sync_lsn.l.file < log->bg_sync_lsn.l.file) || (log->sync_lsn.l.file < min_lsn.l.file)) continue; WT_ERR(__wt_fsync(session, log->log_fh, true)); __wt_spin_lock(session, &log->log_sync_lock); locked = true; /* * The sync LSN could have advanced while we * were writing to disk. */ if (__wt_log_cmp( &log->sync_lsn, &min_lsn) <= 0) { WT_ASSERT(session, min_lsn.l.file == log->sync_lsn.l.file); log->sync_lsn = min_lsn; __wt_cond_signal( session, log->log_sync_cond); } locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } else { __wt_cond_signal(session, conn->log_wrlsn_cond); /* * We do not want to wait potentially a second * to process this. Yield to give the wrlsn * thread a chance to run and try again in * this case. */ yield_count++; __wt_yield(); continue; } } /* Wait until the next event. */ __wt_cond_wait(session, conn->log_file_cond, 100000, NULL); } if (0) { err: WT_PANIC_MSG(session, ret, "log close server error"); } WT_STAT_CONN_INCRV(session, log_server_sync_blocked, yield_count); if (locked) __wt_spin_unlock(session, &log->log_sync_lock); return (WT_THREAD_RET_VALUE); }