/** * Remaps the private view from the shared view so that it does not consume too much * copy-on-write/swap space. Must only be called after the in-memory journal has been flushed * to disk and applied on top of the shared view. * * @param fraction Value between (0, 1] indicating what fraction of the memory to remap. * Remapping too much or too frequently incurs copy-on-write page fault cost. */ static void remapPrivateView(double fraction) { // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see any // newly written data on reads. invariant(!commitJob.hasWritten()); try { Timer t; remapPrivateViewImpl(fraction); stats.curr()->_remapPrivateViewMicros += t.micros(); LOG(4) << "remapPrivateView end"; return; } catch (DBException& e) { severe() << "dbexception in remapPrivateView causing immediate shutdown: " << e.toString(); } catch (std::ios_base::failure& e) { severe() << "ios_base exception in remapPrivateView causing immediate shutdown: " << e.what(); } catch (std::bad_alloc& e) { severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: " << e.what(); } catch (std::exception& e) { severe() << "exception in remapPrivateView causing immediate shutdown: " << e.what(); } catch (...) { severe() << "unknown exception in remapPrivateView causing immediate shutdown: "; } invariant(false); }
/** * The main durability thread loop. There is a single instance of this function running. */ static void durThread(ClockSource* cs, int64_t serverStartMs) { Client::initThread("durability"); log() << "Durability thread started"; bool samePartition = true; try { const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string(); samePartition = onSamePartition(getJournalDir().string(), dbpathDir); } catch (...) { } // Spawn the journal writer thread JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites); journalWriter.start(); // Used as an estimate of how much / how fast to remap uint64_t commitCounter(0); uint64_t estimatedPrivateMapSize(0); uint64_t remapLastTimestamp(0); while (shutdownRequested.loadRelaxed() == 0) { unsigned ms = storageGlobalParams.journalCommitIntervalMs; if (ms == 0) { ms = samePartition ? 100 : 30; } // +1 so it never goes down to zero const int64_t oneThird = (ms / 3) + 1; // Reset the stats based on the reset interval if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) { stats.reset(); } try { stdx::unique_lock<stdx::mutex> lock(flushMutex); for (unsigned i = 0; i <= 2; i++) { if (stdx::cv_status::no_timeout == flushRequested.wait_for(lock, Milliseconds(oneThird).toSystemDuration())) { // Someone forced a flush break; } if (commitNotify.nWaiting()) { // One or more getLastError j:true is pending break; } if (commitJob.bytes() > UncommittedBytesLimit / 2) { // The number of written bytes is growing break; } } // The commit logic itself LOG(4) << "groupCommit begin"; Timer t; const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext(); OperationContext& txn = *txnPtr; AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState()); // We need to snapshot the commitNumber after the flush lock has been obtained, // because at this point we know that we have a stable snapshot of the data. const CommitNotifier::When commitNumber(commitNotify.now()); LOG(4) << "Processing commit number " << commitNumber; if (!commitJob.hasWritten()) { // We do not need the journal lock anymore. Free it here, for the really // unlikely possibility that the writeBuffer command below blocks. autoFlushLock.release(); // getlasterror request could have came after the data was already committed. // No need to call committingReset though, because we have not done any // writes (hasWritten == false). JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); buffer->setNoop(); buffer->journalListenerToken = getJournalListener()->getToken(); journalWriter.writeBuffer(buffer, commitNumber); } else { // This copies all the in-memory changes into the journal writer's buffer. JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder(), cs, serverStartMs); estimatedPrivateMapSize += commitJob.bytes(); commitCounter++; // Now that the write intents have been copied to the buffer, the commit job is // free to be reused. We need to reset the commit job's contents while under // the S flush lock, because otherwise someone might have done a write and this // would wipe out their changes without ever being committed. commitJob.committingReset(); double systemMemoryPressurePercentage = ProcessInfo::getSystemMemoryPressurePercentage(); // Now that the in-memory modifications have been collected, we can potentially // release the flush lock if remap is not necessary. // When we remap due to memory pressure, we look at two criteria // 1. If the amount of 4k pages touched exceeds 512 MB, // a reasonable estimate of memory pressure on Linux. // 2. Check if the amount of free memory on the machine is running low, // since #1 is underestimates the memory pressure on Windows since // commits in 64MB chunks. const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) || (systemMemoryPressurePercentage > 0.0) || (commitCounter % NumCommitsBeforeRemap == 0) || (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap); double remapFraction = 0.0; if (shouldRemap) { // We want to remap all private views about every 2 seconds. There could be // ~1000 views so we do a little each pass. There will be copy on write // faults after remapping, so doing a little bit at a time will avoid big // load spikes when the pages are touched. // // TODO: Instead of the time-based logic above, consider using ProcessInfo // and watching for getResidentSize to drop, which is more precise. remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0; if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) { remapFraction = 1; } else { // We don't want to get close to the UncommittedBytesLimit const double remapMemFraction = estimatedPrivateMapSize / ((double)UncommittedBytesLimit); remapFraction = std::max(remapMemFraction, remapFraction); remapFraction = std::max(systemMemoryPressurePercentage, remapFraction); } } else { LOG(4) << "Early release flush lock"; // We will not be doing a remap so drop the flush lock. That way we will be // doing the journal I/O outside of lock, so other threads can proceed. invariant(!shouldRemap); autoFlushLock.release(); } buffer->journalListenerToken = getJournalListener()->getToken(); // Request async I/O to the journal. This may block. journalWriter.writeBuffer(buffer, commitNumber); // Data has now been written to the shared view. If remap was requested, we // would still be holding the S flush lock here, so just upgrade it and // perform the remap. if (shouldRemap) { // Need to wait for the previously scheduled journal writes to complete // before any remap is attempted. journalWriter.flush(); journalWriter.assertIdle(); // Upgrading the journal lock to flush stops all activity on the system, // because we will be remapping memory and we don't want readers to be // accessing it. Technically this step could be avoided on systems, which // support atomic remap. autoFlushLock.upgradeFlushLockToExclusive(); remapPrivateView(remapFraction); autoFlushLock.release(); // Reset the private map estimate outside of the lock estimatedPrivateMapSize = 0; remapLastTimestamp = curTimeMicros64(); stats.curr()->_commitsInWriteLock++; stats.curr()->_commitsInWriteLockMicros += t.micros(); } } stats.curr()->_commits++; stats.curr()->_commitsMicros += t.micros(); LOG(4) << "groupCommit end"; } catch (DBException& e) { severe() << "dbexception in durThread causing immediate shutdown: " << e.toString(); invariant(false); } catch (std::ios_base::failure& e) { severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::bad_alloc& e) { severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::exception& e) { severe() << "exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (...) { severe() << "unhandled exception in durThread causing immediate shutdown"; invariant(false); } } // Stops the journal thread and ensures everything was written invariant(!commitJob.hasWritten()); journalWriter.flush(); journalWriter.shutdown(); log() << "Durability thread stopped"; }