/* * __sweep_server -- * The handle sweep server thread. */ static void * __sweep_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; session = arg; conn = S2C(session); /* * Sweep for dead handles. */ while (F_ISSET(conn, WT_CONN_SERVER_RUN) && F_ISSET(conn, WT_CONN_SERVER_SWEEP)) { /* Wait until the next event. */ WT_ERR_TIMEDOUT_OK( __wt_cond_wait(session, conn->sweep_cond, 30 * WT_MILLION)); /* Sweep the handles. */ WT_ERR(__sweep(session)); } if (0) { err: __wt_err(session, ret, "handle sweep server error"); } return (NULL); }
/* * __ckpt_server -- * The checkpoint server thread. */ static void * __ckpt_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION *wt_session; WT_SESSION_IMPL *session; session = arg; conn = S2C(session); wt_session = (WT_SESSION *)session; while (F_ISSET(conn, WT_CONN_SERVER_RUN) && F_ISSET(conn, WT_CONN_SERVER_CHECKPOINT)) { /* Checkpoint the database. */ WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config)); /* Wait... */ WT_ERR_TIMEDOUT_OK( __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs)); } if (0) { err: __wt_err(session, ret, "checkpoint server error"); } return (NULL); }
/* * __wt_lsm_merge_worker -- * The merge worker thread for an LSM tree, responsible for merging * on-disk trees. */ void * __wt_lsm_merge_worker(void *vargs) { WT_DECL_RET; WT_LSM_WORKER_ARGS *args; WT_LSM_TREE *lsm_tree; WT_SESSION_IMPL *session; u_int aggressive, chunk_wait, id, old_aggressive, stallms; int progress; args = vargs; lsm_tree = args->lsm_tree; id = args->id; session = lsm_tree->worker_sessions[id]; __wt_free(session, args); aggressive = stallms = 0; while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) { /* * Help out with switching chunks in case the checkpoint worker * is busy. */ if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_switch(session, lsm_tree)); WT_ERR(ret); } progress = 0; /* Clear any state from previous worker thread iterations. */ session->dhandle = NULL; /* Try to create a Bloom filter. */ if (__lsm_bloom_work(session, lsm_tree) == 0) progress = 1; /* If we didn't create a Bloom filter, try to merge. */ if (progress == 0 && __wt_lsm_merge(session, lsm_tree, id, aggressive) == 0) progress = 1; /* Clear any state from previous worker thread iterations. */ WT_CLEAR_BTREE_IN_SESSION(session); /* * Only have one thread freeing old chunks, and only if there * are chunks to free. */ if (id == 0 && lsm_tree->nold_chunks > 0 && __lsm_free_chunks(session, lsm_tree) == 0) progress = 1; if (progress) stallms = 0; else if (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { /* Poll 10 times per second. */ WT_ERR_TIMEDOUT_OK(__wt_cond_wait( session, lsm_tree->work_cond, 100000)); stallms += 100; /* * Get aggressive if more than enough chunks for a * merge should have been created while we waited. * Use 10 seconds as a default if we don't have an * estimate. */ chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ? 10000 : lsm_tree->chunk_fill_ms); old_aggressive = aggressive; aggressive = chunk_wait / lsm_tree->merge_min; if (aggressive > old_aggressive) WT_VERBOSE_ERR(session, lsm, "LSM merge got aggressive (%u), " "%u / %" PRIu64, aggressive, stallms, lsm_tree->chunk_fill_ms); } } if (0) { err: __wt_err(session, ret, "LSM merge worker failed"); } return (NULL); }
/* * __wt_lsm_checkpoint_worker -- * A worker thread for an LSM tree, responsible for flushing new chunks to * disk. */ void * __wt_lsm_checkpoint_worker(void *arg) { WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_TREE *lsm_tree; WT_LSM_WORKER_COOKIE cookie; WT_SESSION_IMPL *session; WT_TXN_ISOLATION saved_isolation; u_int i, j; int locked; lsm_tree = arg; session = lsm_tree->ckpt_session; WT_CLEAR(cookie); while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) { if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { WT_WITH_SCHEMA_LOCK(session, ret = __wt_lsm_tree_switch(session, lsm_tree)); WT_ERR(ret); } WT_ERR(__lsm_copy_chunks(session, lsm_tree, &cookie, 0)); /* Write checkpoints in all completed files. */ for (i = 0, j = 0; i < cookie.nchunks - 1; i++) { if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) goto err; if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) break; chunk = cookie.chunk_array[i]; /* Stop if a running transaction needs the chunk. */ __wt_txn_update_oldest(session); if (!__wt_txn_visible_all(session, chunk->txnid_max)) break; /* * If the chunk is already checkpointed, make sure it * is also evicted. Either way, there is no point * trying to checkpoint it again. */ if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK)) { if (F_ISSET_ATOMIC(chunk, WT_LSM_CHUNK_EVICTED)) continue; if ((ret = __lsm_discard_handle( session, chunk->uri, NULL)) == 0) F_SET_ATOMIC( chunk, WT_LSM_CHUNK_EVICTED); else if (ret == EBUSY) ret = 0; else WT_ERR_MSG(session, ret, "discard handle"); continue; } WT_VERBOSE_ERR(session, lsm, "LSM worker flushing %u", i); /* * Flush the file before checkpointing: this is the * expensive part in terms of I/O: do it without * holding the schema lock. * * Use the special eviction isolation level to avoid * interfering with an application checkpoint: we have * already checked that all of the updates in this * chunk are globally visible. * * !!! We can wait here for checkpoints and fsyncs to * complete, which can be a long time. * * Don't keep waiting for the lock if application * threads are waiting for a switch. Don't skip * flushing the leaves either: that just means we'll * hold the schema lock for (much) longer, which blocks * the world. */ WT_ERR(__wt_session_get_btree( session, chunk->uri, NULL, NULL, 0)); for (locked = 0; !locked && ret == 0 && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH);) { if ((ret = __wt_spin_trylock(session, &S2C(session)->checkpoint_lock)) == 0) locked = 1; else if (ret == EBUSY) { __wt_yield(); ret = 0; } } if (locked) { saved_isolation = session->txn.isolation; session->txn.isolation = TXN_ISO_EVICTION; ret = __wt_bt_cache_op( session, NULL, WT_SYNC_WRITE_LEAVES); session->txn.isolation = saved_isolation; __wt_spin_unlock( session, &S2C(session)->checkpoint_lock); } WT_TRET(__wt_session_release_btree(session)); WT_ERR(ret); if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) break; WT_VERBOSE_ERR(session, lsm, "LSM worker checkpointing %u", i); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, chunk->uri, __wt_checkpoint, NULL, NULL, 0)); if (ret != 0) { __wt_err(session, ret, "LSM checkpoint"); break; } WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk)); /* * Clear the "cache resident" flag so the primary can * be evicted and eventually closed. Only do this once * the checkpoint has succeeded: otherwise, accessing * the leaf page during the checkpoint can trigger * forced eviction. */ WT_ERR(__wt_session_get_btree( session, chunk->uri, NULL, NULL, 0)); __wt_btree_evictable(session, 1); WT_ERR(__wt_session_release_btree(session)); ++j; WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1)); F_SET_ATOMIC(chunk, WT_LSM_CHUNK_ONDISK); ret = __wt_lsm_meta_write(session, lsm_tree); ++lsm_tree->dsk_gen; /* Update the throttle time. */ __wt_lsm_tree_throttle(session, lsm_tree); WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); /* Make sure we aren't pinning a transaction ID. */ __wt_txn_release_snapshot(session); if (ret != 0) { __wt_err(session, ret, "LSM checkpoint metadata write"); break; } WT_VERBOSE_ERR(session, lsm, "LSM worker checkpointed %u", i); } __lsm_unpin_chunks(session, &cookie); if (j == 0 && F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) WT_ERR_TIMEDOUT_OK(__wt_cond_wait( session, lsm_tree->work_cond, 100000)); } err: __lsm_unpin_chunks(session, &cookie); __wt_free(session, cookie.chunk_array); /* * The thread will only exit with failure if we run out of memory or * there is some other system driven failure. We can't keep going * after such a failure - ensure WiredTiger shuts down. */ if (ret != 0 && ret != WT_NOTFOUND) WT_PANIC_ERR(session, ret, "Shutting down LSM checkpoint utility thread"); return (NULL); }