/** locking in read lock when called */ static void _go(AlignedBuilder& bb) { if( wi._writes.empty() ) return; PREPLOGBUFFER(bb); // todo: add double buffering so we can be (not even read locked) during WRITETOJOURNAL WRITETOJOURNAL(bb); // write the noted write intent entries to the data files WRITETODATAFILES(); wi.clear(); REMAPPRIVATEVIEW(); }
/** locking in read lock when called @see MongoMMF::close() */ static void groupCommit() { dbMutex.assertAtLeastReadLocked(); if( !commitJob.hasWritten() ) return; PREPLOGBUFFER(); WRITETOJOURNAL(commitJob._ab); // data is now in the journal, which is sufficient for acknowledging getlasterror. // (ok to crash after that) log() << "TEMP NOTIFYING COMMITTED" << endl; commitJob.notifyCommitted(); // write the noted write intent entries to the data files. // this has to come after writing to the journal, obviously... MongoFile::markAllWritable(); // for _DEBUG. normally we don't write in a read lock WRITETODATAFILES(); if (!dbMutex.isWriteLocked()) MongoFile::unmarkAllWritable(); commitJob.reset(); // REMAPPRIVATEVIEW // // remapping private views must occur after WRITETODATAFILES otherwise // we wouldn't see newly written data on reads. // DEV assert( !commitJob.hasWritten() ); if( !dbMutex.isWriteLocked() ) { // this needs done in a write lock thus we do it on the next acquisition of that // instead of here (there is no rush if you aren't writing anyway -- but it must happen, // if it is done, before any uncommitted writes occur). // dbMutex._remapPrivateViewRequested = true; } else { // however, if we are already write locked, we must do it now -- up the call tree someone // may do a write without a new lock acquisition. this can happen when MongoMMF::close() calls // this method when a file (and its views) is about to go away. // REMAPPRIVATEVIEW(); } }
static void _groupCommit() { LOG(4) << "_groupCommit " << endl; { AlignedBuilder &ab = __theBuilder; // we need to make sure two group commits aren't running at the same time // (and we are only read locked in the dbMutex, so it could happen -- while // there is only one dur thread, "early commits" can be done by other threads) SimpleMutex::scoped_lock lk(commitJob.groupCommitMutex); commitJob.commitingBegin(); if( !commitJob.hasWritten() ) { // getlasterror request could have came after the data was already committed commitJob.committingNotifyCommitted(); } else { JSectHeader h; PREPLOGBUFFER(h,ab); // todo : write to the journal outside locks, as this write can be slow. // however, be careful then about remapprivateview as that cannot be done // if new writes are then pending in the private maps. WRITETOJOURNAL(h, ab); // data is now in the journal, which is sufficient for acknowledging getLastError. // (ok to crash after that) commitJob.committingNotifyCommitted(); WRITETODATAFILES(h, ab); debugValidateAllMapsMatch(); commitJob.committingReset(); ab.reset(); } } }
/** * The main durability thread loop. There is a single instance of this function running. */ static void durThread(ClockSource* cs, int64_t serverStartMs) { Client::initThread("durability"); log() << "Durability thread started"; bool samePartition = true; try { const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string(); samePartition = onSamePartition(getJournalDir().string(), dbpathDir); } catch (...) { } // Spawn the journal writer thread JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites); journalWriter.start(); // Used as an estimate of how much / how fast to remap uint64_t commitCounter(0); uint64_t estimatedPrivateMapSize(0); uint64_t remapLastTimestamp(0); while (shutdownRequested.loadRelaxed() == 0) { unsigned ms = storageGlobalParams.journalCommitIntervalMs; if (ms == 0) { ms = samePartition ? 100 : 30; } // +1 so it never goes down to zero const int64_t oneThird = (ms / 3) + 1; // Reset the stats based on the reset interval if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) { stats.reset(); } try { stdx::unique_lock<stdx::mutex> lock(flushMutex); for (unsigned i = 0; i <= 2; i++) { if (stdx::cv_status::no_timeout == flushRequested.wait_for(lock, Milliseconds(oneThird).toSystemDuration())) { // Someone forced a flush break; } if (commitNotify.nWaiting()) { // One or more getLastError j:true is pending break; } if (commitJob.bytes() > UncommittedBytesLimit / 2) { // The number of written bytes is growing break; } } // The commit logic itself LOG(4) << "groupCommit begin"; Timer t; const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext(); OperationContext& txn = *txnPtr; AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState()); // We need to snapshot the commitNumber after the flush lock has been obtained, // because at this point we know that we have a stable snapshot of the data. const CommitNotifier::When commitNumber(commitNotify.now()); LOG(4) << "Processing commit number " << commitNumber; if (!commitJob.hasWritten()) { // We do not need the journal lock anymore. Free it here, for the really // unlikely possibility that the writeBuffer command below blocks. autoFlushLock.release(); // getlasterror request could have came after the data was already committed. // No need to call committingReset though, because we have not done any // writes (hasWritten == false). JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); buffer->setNoop(); buffer->journalListenerToken = getJournalListener()->getToken(); journalWriter.writeBuffer(buffer, commitNumber); } else { // This copies all the in-memory changes into the journal writer's buffer. JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder(), cs, serverStartMs); estimatedPrivateMapSize += commitJob.bytes(); commitCounter++; // Now that the write intents have been copied to the buffer, the commit job is // free to be reused. We need to reset the commit job's contents while under // the S flush lock, because otherwise someone might have done a write and this // would wipe out their changes without ever being committed. commitJob.committingReset(); double systemMemoryPressurePercentage = ProcessInfo::getSystemMemoryPressurePercentage(); // Now that the in-memory modifications have been collected, we can potentially // release the flush lock if remap is not necessary. // When we remap due to memory pressure, we look at two criteria // 1. If the amount of 4k pages touched exceeds 512 MB, // a reasonable estimate of memory pressure on Linux. // 2. Check if the amount of free memory on the machine is running low, // since #1 is underestimates the memory pressure on Windows since // commits in 64MB chunks. const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) || (systemMemoryPressurePercentage > 0.0) || (commitCounter % NumCommitsBeforeRemap == 0) || (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap); double remapFraction = 0.0; if (shouldRemap) { // We want to remap all private views about every 2 seconds. There could be // ~1000 views so we do a little each pass. There will be copy on write // faults after remapping, so doing a little bit at a time will avoid big // load spikes when the pages are touched. // // TODO: Instead of the time-based logic above, consider using ProcessInfo // and watching for getResidentSize to drop, which is more precise. remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0; if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) { remapFraction = 1; } else { // We don't want to get close to the UncommittedBytesLimit const double remapMemFraction = estimatedPrivateMapSize / ((double)UncommittedBytesLimit); remapFraction = std::max(remapMemFraction, remapFraction); remapFraction = std::max(systemMemoryPressurePercentage, remapFraction); } } else { LOG(4) << "Early release flush lock"; // We will not be doing a remap so drop the flush lock. That way we will be // doing the journal I/O outside of lock, so other threads can proceed. invariant(!shouldRemap); autoFlushLock.release(); } buffer->journalListenerToken = getJournalListener()->getToken(); // Request async I/O to the journal. This may block. journalWriter.writeBuffer(buffer, commitNumber); // Data has now been written to the shared view. If remap was requested, we // would still be holding the S flush lock here, so just upgrade it and // perform the remap. if (shouldRemap) { // Need to wait for the previously scheduled journal writes to complete // before any remap is attempted. journalWriter.flush(); journalWriter.assertIdle(); // Upgrading the journal lock to flush stops all activity on the system, // because we will be remapping memory and we don't want readers to be // accessing it. Technically this step could be avoided on systems, which // support atomic remap. autoFlushLock.upgradeFlushLockToExclusive(); remapPrivateView(remapFraction); autoFlushLock.release(); // Reset the private map estimate outside of the lock estimatedPrivateMapSize = 0; remapLastTimestamp = curTimeMicros64(); stats.curr()->_commitsInWriteLock++; stats.curr()->_commitsInWriteLockMicros += t.micros(); } } stats.curr()->_commits++; stats.curr()->_commitsMicros += t.micros(); LOG(4) << "groupCommit end"; } catch (DBException& e) { severe() << "dbexception in durThread causing immediate shutdown: " << e.toString(); invariant(false); } catch (std::ios_base::failure& e) { severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::bad_alloc& e) { severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::exception& e) { severe() << "exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (...) { severe() << "unhandled exception in durThread causing immediate shutdown"; invariant(false); } } // Stops the journal thread and ensures everything was written invariant(!commitJob.hasWritten()); journalWriter.flush(); journalWriter.shutdown(); log() << "Durability thread stopped"; }
static void _go(BufBuilder& bb) { PREPLOGBUFFER(bb); // todo: add double buffering so we can be (not even read locked) during WRITETOJOURNAL WRITETOJOURNAL(bb); }