/* * __log_slot_switch_internal -- * Switch out the current slot and set up a new one. */ static int __log_slot_switch_internal( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced) { WT_DECL_RET; WT_LOG *log; WT_LOGSLOT *slot; bool free_slot, release; log = S2C(session)->log; release = false; slot = myslot->slot; WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); /* * If someone else raced us to closing this specific slot, we're * done here. */ if (slot != log->active_slot) return (0); /* * We may come through here multiple times if we were able to close * a slot but could not set up a new one. If we closed it already, * don't try to do it again but still set up the new slot. */ if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { ret = __log_slot_close(session, slot, &release, forced); /* * If close returns WT_NOTFOUND it means that someone else * is processing the slot change. */ if (ret == WT_NOTFOUND) return (0); WT_RET(ret); if (release) { WT_RET(__wt_log_release(session, slot, &free_slot)); if (free_slot) __wt_log_slot_free(session, slot); } } /* * Set that we have closed this slot because we may call in here * multiple times if we retry creating a new slot. */ F_SET(myslot, WT_MYSLOT_CLOSE); WT_RET(__wt_log_slot_new(session)); F_CLR(myslot, WT_MYSLOT_CLOSE); return (0); }
/* * __wt_log_wrlsn -- * Process written log slots and attempt to coalesce them if the LSNs * are contiguous. The purpose of this function is to advance the * write_lsn in LSN order after the buffer is written to the log file. */ int __wt_log_wrlsn(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL]; WT_LOGSLOT *coalescing, *slot; WT_LSN save_lsn; size_t written_i; uint32_t i, save_i; conn = S2C(session); log = conn->log; __wt_spin_lock(session, &log->log_writelsn_lock); restart: coalescing = NULL; WT_INIT_LSN(&save_lsn); written_i = 0; i = 0; /* * Walk the array once saving any slots that are in the * WT_LOG_SLOT_WRITTEN state. */ while (i < WT_SLOT_POOL) { save_i = i; slot = &log->slot_pool[i++]; /* * XXX - During debugging I saw slot 0 become orphaned. * I believe it is fixed, but check for now. * This assertion should catch that. */ if (slot->slot_state == 0) WT_ASSERT(session, slot->slot_release_lsn.file >= log->write_lsn.file); if (slot->slot_state != WT_LOG_SLOT_WRITTEN) continue; written[written_i].slot_index = save_i; written[written_i++].lsn = slot->slot_release_lsn; } /* * If we found any written slots process them. We sort them * based on the release LSN, and then look for them in order. */ if (written_i > 0) { WT_INSERTION_SORT(written, written_i, WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT); /* * We know the written array is sorted by LSN. Go * through them either advancing write_lsn or coalesce * contiguous ranges of written slots. */ for (i = 0; i < written_i; i++) { slot = &log->slot_pool[written[i].slot_index]; /* * The log server thread pushes out slots periodically. * Sometimes they are empty slots. If we find an * empty slot, where empty means the start and end LSN * are the same, free it and continue. */ if (__wt_log_cmp(&slot->slot_start_lsn, &slot->slot_release_lsn) == 0 && __wt_log_cmp(&slot->slot_start_lsn, &slot->slot_end_lsn) == 0) { __wt_log_slot_free(session, slot); continue; } if (coalescing != NULL) { /* * If the write_lsn changed, we may be able to * process slots. Try again. */ if (__wt_log_cmp( &log->write_lsn, &save_lsn) != 0) goto restart; if (__wt_log_cmp(&coalescing->slot_end_lsn, &written[i].lsn) != 0) { coalescing = slot; continue; } /* * If we get here we have a slot to coalesce * and free. */ coalescing->slot_last_offset = slot->slot_last_offset; coalescing->slot_end_lsn = slot->slot_end_lsn; WT_STAT_FAST_CONN_INCR( session, log_slot_coalesced); /* * Copy the flag for later closing. */ if (F_ISSET(slot, WT_SLOT_CLOSEFH)) F_SET(coalescing, WT_SLOT_CLOSEFH); } else { /* * If this written slot is not the next LSN, * try to start coalescing with later slots. * A synchronous write may update write_lsn * so save the last one we saw to check when * coalescing slots. */ save_lsn = log->write_lsn; if (__wt_log_cmp( &log->write_lsn, &written[i].lsn) != 0) { coalescing = slot; continue; } /* * If we get here we have a slot to process. * Advance the LSN and process the slot. */ WT_ASSERT(session, __wt_log_cmp(&written[i].lsn, &slot->slot_release_lsn) == 0); if (slot->slot_start_lsn.offset != slot->slot_last_offset) slot->slot_start_lsn.offset = slot->slot_last_offset; log->write_start_lsn = slot->slot_start_lsn; log->write_lsn = slot->slot_end_lsn; WT_ERR(__wt_cond_signal( session, log->log_write_cond)); WT_STAT_FAST_CONN_INCR(session, log_write_lsn); /* * Signal the close thread if needed. */ if (F_ISSET(slot, WT_SLOT_CLOSEFH)) WT_ERR(__wt_cond_signal( session, conn->log_file_cond)); } __wt_log_slot_free(session, slot); } } err: __wt_spin_unlock(session, &log->log_writelsn_lock); return (ret); }
/* * __log_wrlsn_server -- * The log wrlsn server thread. */ static WT_THREAD_RET __log_wrlsn_server(void *arg) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL]; WT_LOGSLOT *slot; WT_SESSION_IMPL *session; size_t written_i; uint32_t i, save_i; int yield; session = arg; conn = S2C(session); log = conn->log; yield = 0; while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { /* * No need to use the log_slot_lock because the slot pool * is statically allocated and any slot in the * WT_LOG_SLOT_WRITTEN state is exclusively ours for now. */ i = 0; written_i = 0; /* * Walk the array once saving any slots that are in the * WT_LOG_SLOT_WRITTEN state. */ while (i < WT_SLOT_POOL) { save_i = i; slot = &log->slot_pool[i++]; if (slot->slot_state != WT_LOG_SLOT_WRITTEN) continue; written[written_i].slot_index = save_i; written[written_i++].lsn = slot->slot_release_lsn; } /* * If we found any written slots process them. We sort them * based on the release LSN, and then look for them in order. */ if (written_i > 0) { yield = 0; WT_INSERTION_SORT(written, written_i, WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT); /* * We know the written array is sorted by LSN. Go * through them either advancing write_lsn or stop * as soon as one is not in order. */ for (i = 0; i < written_i; i++) { if (WT_LOG_CMP(&log->write_lsn, &written[i].lsn) != 0) break; /* * If we get here we have a slot to process. * Advance the LSN and process the slot. */ slot = &log->slot_pool[written[i].slot_index]; WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn, &slot->slot_release_lsn) == 0); log->write_start_lsn = slot->slot_start_lsn; log->write_lsn = slot->slot_end_lsn; WT_ERR(__wt_cond_signal(session, log->log_write_cond)); WT_STAT_FAST_CONN_INCR(session, log_write_lsn); /* * Signal the close thread if needed. */ if (F_ISSET(slot, WT_SLOT_CLOSEFH)) WT_ERR(__wt_cond_signal(session, conn->log_file_cond)); WT_ERR(__wt_log_slot_free(session, slot)); } } /* * If we saw a later write, we always want to yield because * we know something is in progress. */ if (yield++ < 1000) __wt_yield(); else /* Wait until the next event. */ WT_ERR(__wt_cond_wait(session, conn->log_wrlsn_cond, 100000)); } if (0) err: __wt_err(session, ret, "log wrlsn server error"); return (WT_THREAD_RET_VALUE); }
/* * __wt_log_write -- * Write a record into the log. */ int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; WT_LOG_RECORD *logrec; WT_LSN lsn; WT_MYSLOT myslot; uint32_t rdup_len; int locked; conn = S2C(session); log = conn->log; locked = 0; INIT_LSN(&lsn); myslot.slot = NULL; /* * Assume the WT_ITEM the user passed is a WT_LOG_RECORD, which has * a header at the beginning for us to fill in. * * If using direct_io, the caller should pass us an aligned record. * But we need to make sure it is big enough and zero-filled so * that we can write the full amount. Do this whether or not * direct_io is in use because it makes the reading code cleaner. */ WT_STAT_FAST_CONN_INCRV(session, log_bytes_user, record->size); rdup_len = __wt_rduppo2((uint32_t)record->size, log->allocsize); WT_ERR(__wt_buf_grow(session, record, rdup_len)); WT_ASSERT(session, record->data == record->mem); /* * If the caller's record only partially fills the necessary * space, we need to zero-fill the remainder. */ if (record->size != rdup_len) { memset((uint8_t *)record->mem + record->size, 0, rdup_len - record->size); record->size = rdup_len; } logrec = (WT_LOG_RECORD *)record->mem; logrec->len = (uint32_t)record->size; logrec->checksum = 0; logrec->checksum = __wt_cksum(logrec, record->size); WT_STAT_FAST_CONN_INCR(session, log_writes); if (!F_ISSET(log, WT_LOG_FORCE_CONSOLIDATE)) { ret = __log_direct_write(session, record, lsnp, flags); if (ret == 0) return (0); if (ret != EAGAIN) WT_ERR(ret); /* * An EAGAIN return means we failed to get the try lock - * fall through to the consolidation code in that case. */ } /* * As soon as we see contention for the log slot, disable direct * log writes. We get better performance by forcing writes through * the consolidation code. This is because individual writes flood * the I/O system faster than they contend on the log slot lock. */ F_SET(log, WT_LOG_FORCE_CONSOLIDATE); if ((ret = __wt_log_slot_join( session, rdup_len, flags, &myslot)) == ENOMEM) { /* * If we couldn't find a consolidated slot for this record * write the record directly. */ while ((ret = __log_direct_write( session, record, lsnp, flags)) == EAGAIN) ; WT_ERR(ret); /* * Increase the buffer size of any slots we can get access * to, so future consolidations are likely to succeed. */ WT_ERR(__wt_log_slot_grow_buffers(session, 4 * rdup_len)); return (0); } WT_ERR(ret); if (myslot.offset == 0) { __wt_spin_lock(session, &log->log_slot_lock); locked = 1; WT_ERR(__wt_log_slot_close(session, myslot.slot)); WT_ERR(__log_acquire( session, myslot.slot->slot_group_size, myslot.slot)); __wt_spin_unlock(session, &log->log_slot_lock); locked = 0; WT_ERR(__wt_log_slot_notify(session, myslot.slot)); } else WT_ERR(__wt_log_slot_wait(session, myslot.slot)); WT_ERR(__log_fill(session, &myslot, 0, record, &lsn)); if (__wt_log_slot_release(myslot.slot, rdup_len) == WT_LOG_SLOT_DONE) { WT_ERR(__log_release(session, myslot.slot)); WT_ERR(__wt_log_slot_free(myslot.slot)); } else if (LF_ISSET(WT_LOG_FSYNC)) { /* Wait for our writes to reach disk */ while (LOG_CMP(&log->sync_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) (void)__wt_cond_wait( session, log->log_sync_cond, 10000); } err: if (locked) __wt_spin_unlock(session, &log->log_slot_lock); if (ret == 0 && lsnp != NULL) *lsnp = lsn; /* * If we're synchronous and some thread had an error, we don't know * if our write made it out to the file or not. The error could be * before or after us. So, if anyone got an error, we report it. * If we're not synchronous, only report if our own operation got * an error. */ if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC) && ret == 0 && myslot.slot != NULL) ret = myslot.slot->slot_error; return (ret); }
/* * __log_slot_switch_internal -- * Switch out the current slot and set up a new one. */ static int __log_slot_switch_internal( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced, bool *did_work) { WT_DECL_RET; WT_LOG *log; WT_LOGSLOT *slot; uint32_t joined; bool free_slot, release; log = S2C(session)->log; release = false; slot = myslot->slot; WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); /* * If someone else raced us to closing this specific slot, we're * done here. */ if (slot != log->active_slot) return (0); /* * If the current active slot is unused and this is a forced switch, * we're done. If this is a non-forced switch we always switch * because the slot could be part of an unbuffered operation. */ joined = WT_LOG_SLOT_JOINED(slot->slot_state); if (joined == 0 && forced && !F_ISSET(log, WT_LOG_FORCE_NEWFILE)) { WT_STAT_CONN_INCR(session, log_force_write_skip); if (did_work != NULL) *did_work = false; return (0); } /* * We may come through here multiple times if we were not able to * set up a new one. If we closed it already, * don't try to do it again but still set up the new slot. */ if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { ret = __log_slot_close(session, slot, &release, forced); /* * If close returns WT_NOTFOUND it means that someone else * is processing the slot change. */ if (ret == WT_NOTFOUND) return (0); WT_RET(ret); /* * Set that we have closed this slot because we may call in here * multiple times if we retry creating a new slot. Similarly * set retain whether this slot needs releasing so that we don't * lose that information if we retry. */ F_SET(myslot, WT_MYSLOT_CLOSE); if (release) F_SET(myslot, WT_MYSLOT_NEEDS_RELEASE); } /* * Now that the slot is closed, set up a new one so that joining * threads don't have to wait on writing the previous slot if we * release it. Release after setting a new one. */ WT_RET(__log_slot_new(session)); F_CLR(myslot, WT_MYSLOT_CLOSE); if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) { /* * The release here must be done while holding the slot lock. * The reason is that a forced slot switch needs to be sure * that any earlier slot switches have completed, including * writing out the buffer contents of earlier slots. */ WT_RET(__wt_log_release(session, slot, &free_slot)); F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE); if (free_slot) __wt_log_slot_free(session, slot); } return (ret); }
/* * __wt_log_wrlsn -- * Process written log slots and attempt to coalesce them if the LSNs * are contiguous. Returns 1 if slots were freed, 0 if no slots were * freed in the progress arg. Must be called with the log slot lock held. */ int __wt_log_wrlsn(WT_SESSION_IMPL *session, uint32_t *free_i, int *yield) { WT_CONNECTION_IMPL *conn; WT_LOG *log; WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL]; WT_LOGSLOT *coalescing, *slot; size_t written_i; uint32_t i, save_i; conn = S2C(session); log = conn->log; coalescing = NULL; written_i = 0; i = 0; if (free_i != NULL) *free_i = WT_SLOT_POOL; /* * Walk the array once saving any slots that are in the * WT_LOG_SLOT_WRITTEN state. */ while (i < WT_SLOT_POOL) { save_i = i; slot = &log->slot_pool[i++]; if (free_i != NULL && *free_i == WT_SLOT_POOL && slot->slot_state == WT_LOG_SLOT_FREE) *free_i = save_i; if (slot->slot_state != WT_LOG_SLOT_WRITTEN) continue; written[written_i].slot_index = save_i; written[written_i++].lsn = slot->slot_release_lsn; } /* * If we found any written slots process them. We sort them * based on the release LSN, and then look for them in order. */ if (written_i > 0) { /* * If wanted, reset the yield variable to indicate that we * have found written slots. */ if (yield != NULL) *yield = 0; WT_INSERTION_SORT(written, written_i, WT_LOG_WRLSN_ENTRY, WT_WRLSN_ENTRY_CMP_LT); /* * We know the written array is sorted by LSN. Go * through them either advancing write_lsn or coalesce * contiguous ranges of written slots. */ for (i = 0; i < written_i; i++) { slot = &log->slot_pool[written[i].slot_index]; if (coalescing != NULL) { if (WT_LOG_CMP(&coalescing->slot_end_lsn, &written[i].lsn) != 0) { coalescing = slot; continue; } /* * If we get here we have a slot to coalesce * and free. */ coalescing->slot_end_lsn = slot->slot_end_lsn; WT_STAT_FAST_CONN_INCR( session, log_slot_coalesced); /* * Copy the flag for later closing. */ if (F_ISSET(slot, WT_SLOT_CLOSEFH)) F_SET(coalescing, WT_SLOT_CLOSEFH); } else { /* * If this written slot is not the next LSN, * try to start coalescing with later slots. */ if (WT_LOG_CMP( &log->write_lsn, &written[i].lsn) != 0) { coalescing = slot; continue; } /* * If we get here we have a slot to process. * Advance the LSN and process the slot. */ WT_ASSERT(session, WT_LOG_CMP(&written[i].lsn, &slot->slot_release_lsn) == 0); log->write_start_lsn = slot->slot_start_lsn; log->write_lsn = slot->slot_end_lsn; WT_RET(__wt_cond_signal( session, log->log_write_cond)); WT_STAT_FAST_CONN_INCR(session, log_write_lsn); /* * Signal the close thread if needed. */ if (F_ISSET(slot, WT_SLOT_CLOSEFH)) WT_RET(__wt_cond_signal( session, conn->log_file_cond)); } WT_RET(__wt_log_slot_free(session, slot)); if (free_i != NULL && *free_i == WT_SLOT_POOL && slot->slot_state == WT_LOG_SLOT_FREE) *free_i = save_i; } } return (0); }