/* * __log_slot_switch_internal -- * Switch out the current slot and set up a new one. */ static int __log_slot_switch_internal( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced, bool *did_work) { WT_DECL_RET; WT_LOG *log; WT_LOGSLOT *slot; uint32_t joined; bool free_slot, release; log = S2C(session)->log; release = false; slot = myslot->slot; WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); /* * If someone else raced us to closing this specific slot, we're * done here. */ if (slot != log->active_slot) return (0); /* * If the current active slot is unused and this is a forced switch, * we're done. If this is a non-forced switch we always switch * because the slot could be part of an unbuffered operation. */ joined = WT_LOG_SLOT_JOINED(slot->slot_state); if (joined == 0 && forced && !F_ISSET(log, WT_LOG_FORCE_NEWFILE)) { WT_STAT_CONN_INCR(session, log_force_write_skip); if (did_work != NULL) *did_work = false; return (0); } /* * We may come through here multiple times if we were not able to * set up a new one. If we closed it already, * don't try to do it again but still set up the new slot. */ if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { ret = __log_slot_close(session, slot, &release, forced); /* * If close returns WT_NOTFOUND it means that someone else * is processing the slot change. */ if (ret == WT_NOTFOUND) return (0); WT_RET(ret); /* * Set that we have closed this slot because we may call in here * multiple times if we retry creating a new slot. Similarly * set retain whether this slot needs releasing so that we don't * lose that information if we retry. */ F_SET(myslot, WT_MYSLOT_CLOSE); if (release) F_SET(myslot, WT_MYSLOT_NEEDS_RELEASE); } /* * Now that the slot is closed, set up a new one so that joining * threads don't have to wait on writing the previous slot if we * release it. Release after setting a new one. */ WT_RET(__log_slot_new(session)); F_CLR(myslot, WT_MYSLOT_CLOSE); if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) { /* * The release here must be done while holding the slot lock. * The reason is that a forced slot switch needs to be sure * that any earlier slot switches have completed, including * writing out the buffer contents of earlier slots. */ WT_RET(__wt_log_release(session, slot, &free_slot)); F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE); if (free_slot) __wt_log_slot_free(session, slot); } return (ret); }
/* * __wt_log_slot_join -- * Join a consolidated logging slot. */ void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) { WT_CONNECTION_IMPL *conn; WT_LOG *log; WT_LOGSLOT *slot; uint64_t time_start, time_stop, usecs; int64_t flag_state, new_state, old_state, released; int32_t join_offset, new_join, wait_cnt; bool closed, diag_yield, raced, slept, unbuffered, yielded; conn = S2C(session); log = conn->log; time_start = time_stop = 0; WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); WT_ASSERT(session, mysize != 0); /* * There should almost always be a slot open. */ unbuffered = yielded = false; closed = raced = slept = false; wait_cnt = 0; #ifdef HAVE_DIAGNOSTIC diag_yield = (++log->write_calls % 7) == 0; if ((log->write_calls % WT_THOUSAND) == 0 || mysize > WT_LOG_SLOT_BUF_MAX) { #else diag_yield = false; if (mysize > WT_LOG_SLOT_BUF_MAX) { #endif unbuffered = true; F_SET(myslot, WT_MYSLOT_UNBUFFERED); } for (;;) { WT_BARRIER(); slot = log->active_slot; old_state = slot->slot_state; if (WT_LOG_SLOT_OPEN(old_state)) { /* * Try to join our size into the existing size and * atomically write it back into the state. */ flag_state = WT_LOG_SLOT_FLAGS(old_state); released = WT_LOG_SLOT_RELEASED(old_state); join_offset = WT_LOG_SLOT_JOINED(old_state); if (unbuffered) new_join = join_offset + WT_LOG_SLOT_UNBUFFERED; else new_join = join_offset + (int32_t)mysize; new_state = (int64_t)WT_LOG_SLOT_JOIN_REL( (int64_t)new_join, (int64_t)released, (int64_t)flag_state); /* * Braces used due to potential empty body warning. */ if (diag_yield) { WT_DIAGNOSTIC_YIELD; } /* * Attempt to swap our size into the state. */ if (__wt_atomic_casiv64( &slot->slot_state, old_state, new_state)) break; WT_STAT_CONN_INCR(session, log_slot_races); raced = true; } else { WT_STAT_CONN_INCR(session, log_slot_active_closed); closed = true; ++wait_cnt; } if (!yielded) time_start = __wt_clock(session); yielded = true; /* * The slot is no longer open or we lost the race to * update it. Yield and try again. */ if (wait_cnt < WT_THOUSAND) __wt_yield(); else { __wt_sleep(0, WT_THOUSAND); slept = true; } } /* * We joined this slot. Fill in our information to return to * the caller. */ if (!yielded) WT_STAT_CONN_INCR(session, log_slot_immediate); else { WT_STAT_CONN_INCR(session, log_slot_yield); time_stop = __wt_clock(session); usecs = WT_CLOCKDIFF_US(time_stop, time_start); WT_STAT_CONN_INCRV(session, log_slot_yield_duration, usecs); if (closed) WT_STAT_CONN_INCR(session, log_slot_yield_close); if (raced) WT_STAT_CONN_INCR(session, log_slot_yield_race); if (slept) WT_STAT_CONN_INCR(session, log_slot_yield_sleep); } if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC)) F_SET(slot, WT_SLOT_SYNC_DIR); if (LF_ISSET(WT_LOG_FLUSH)) F_SET(slot, WT_SLOT_FLUSH); if (LF_ISSET(WT_LOG_FSYNC)) F_SET(slot, WT_SLOT_SYNC); if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED)) { WT_ASSERT(session, slot->slot_unbuffered == 0); WT_STAT_CONN_INCR(session, log_slot_unbuffered); slot->slot_unbuffered = (int64_t)mysize; } myslot->slot = slot; myslot->offset = join_offset; myslot->end_offset = (wt_off_t)((uint64_t)join_offset + mysize); } /* * __wt_log_slot_release -- * Each thread in a consolidated group releases its portion to * signal it has completed copying its piece of the log into * the memory buffer. */ int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) { WT_LOGSLOT *slot; wt_off_t cur_offset, my_start; int64_t my_size, rel_size; slot = myslot->slot; my_start = slot->slot_start_offset + myslot->offset; /* * We maintain the last starting offset within this slot. * This is used to know the offset of the last record that * was written rather than the beginning record of the slot. */ while ((cur_offset = slot->slot_last_offset) < my_start) { /* * Set our offset if we are larger. */ if (__wt_atomic_casiv64( &slot->slot_last_offset, cur_offset, my_start)) break; /* * If we raced another thread updating this, try again. */ WT_BARRIER(); } /* * Add my size into the state and return the new size. */ rel_size = size; if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED)) rel_size = WT_LOG_SLOT_UNBUFFERED; my_size = (int64_t)WT_LOG_SLOT_JOIN_REL((int64_t)0, rel_size, 0); return (__wt_atomic_addiv64(&slot->slot_state, my_size)); }
/* * __wt_log_slot_join -- * Join a consolidated logging slot. Must be called with * the read lock held. */ void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) { WT_CONNECTION_IMPL *conn; WT_LOG *log; WT_LOGSLOT *slot; int64_t flag_state, new_state, old_state, released; int32_t join_offset, new_join; #ifdef HAVE_DIAGNOSTIC bool unbuf_force; #endif conn = S2C(session); log = conn->log; /* * Make sure the length cannot overflow. The caller should not * even call this function if it doesn't fit but use direct * writes. */ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); /* * There should almost always be a slot open. */ #ifdef HAVE_DIAGNOSTIC unbuf_force = (++log->write_calls % 1000) == 0; #endif for (;;) { WT_BARRIER(); slot = log->active_slot; old_state = slot->slot_state; /* * Try to join our size into the existing size and * atomically write it back into the state. */ flag_state = WT_LOG_SLOT_FLAGS(old_state); released = WT_LOG_SLOT_RELEASED(old_state); join_offset = WT_LOG_SLOT_JOINED(old_state); #ifdef HAVE_DIAGNOSTIC if (unbuf_force || mysize > WT_LOG_SLOT_BUF_MAX) { #else if (mysize > WT_LOG_SLOT_BUF_MAX) { #endif new_join = join_offset + WT_LOG_SLOT_UNBUFFERED; F_SET(myslot, WT_MYSLOT_UNBUFFERED); myslot->slot = slot; } else new_join = join_offset + (int32_t)mysize; new_state = (int64_t)WT_LOG_SLOT_JOIN_REL( (int64_t)new_join, (int64_t)released, (int64_t)flag_state); /* * Check if the slot is open for joining and we are able to * swap in our size into the state. */ if (WT_LOG_SLOT_OPEN(old_state) && __wt_atomic_casiv64( &slot->slot_state, old_state, new_state)) break; /* * The slot is no longer open or we lost the race to * update it. Yield and try again. */ WT_STAT_FAST_CONN_INCR(session, log_slot_races); __wt_yield(); } /* * We joined this slot. Fill in our information to return to * the caller. */ if (mysize != 0) WT_STAT_FAST_CONN_INCR(session, log_slot_joins); if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC)) F_SET(slot, WT_SLOT_SYNC_DIR); if (LF_ISSET(WT_LOG_FLUSH)) F_SET(slot, WT_SLOT_FLUSH); if (LF_ISSET(WT_LOG_FSYNC)) F_SET(slot, WT_SLOT_SYNC); if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED)) { WT_ASSERT(session, slot->slot_unbuffered == 0); WT_STAT_FAST_CONN_INCR(session, log_slot_unbuffered); slot->slot_unbuffered = (int64_t)mysize; } myslot->slot = slot; myslot->offset = join_offset; myslot->end_offset = (wt_off_t)((uint64_t)join_offset + mysize); } /* * __wt_log_slot_release -- * Each thread in a consolidated group releases its portion to * signal it has completed copying its piece of the log into * the memory buffer. */ int64_t __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) { WT_LOGSLOT *slot; wt_off_t cur_offset, my_start; int64_t my_size, rel_size; WT_UNUSED(session); slot = myslot->slot; my_start = slot->slot_start_offset + myslot->offset; while ((cur_offset = slot->slot_last_offset) < my_start) { /* * Set our offset if we are larger. */ if (__wt_atomic_casiv64( &slot->slot_last_offset, cur_offset, my_start)) break; /* * If we raced another thread updating this, try again. */ WT_BARRIER(); } /* * Add my size into the state and return the new size. */ rel_size = size; if (F_ISSET(myslot, WT_MYSLOT_UNBUFFERED)) rel_size = WT_LOG_SLOT_UNBUFFERED; my_size = (int64_t)WT_LOG_SLOT_JOIN_REL((int64_t)0, rel_size, 0); return (__wt_atomic_addiv64(&slot->slot_state, my_size)); }