/** throws */ void removeJournalFiles() { log() << "removeJournalFiles" << endl; try { for ( boost::filesystem::directory_iterator i( getJournalDir() ); i != boost::filesystem::directory_iterator(); ++i ) { string fileName = boost::filesystem::path(*i).leaf(); if( str::startsWith(fileName, "j._") ) { try { boost::filesystem::remove(*i); } catch(std::exception& e) { log() << "couldn't remove " << fileName << ' ' << e.what() << endl; throw; } } } try { boost::filesystem::remove(lsnPath()); } catch(...) { log() << "couldn't remove " << lsnPath().string() << endl; throw; } } catch( std::exception& e ) { log() << "error removing journal files " << e.what() << endl; throw; } assert(!haveJournalFiles()); log(1) << "removeJournalFiles end" << endl; }
/** never throws @return true if journal dir is not empty */ bool haveJournalFiles() { try { for ( boost::filesystem::directory_iterator i( getJournalDir() ); i != boost::filesystem::directory_iterator(); ++i ) { string fileName = boost::filesystem::path(*i).leaf(); if( str::startsWith(fileName, "j._") ) return true; } } catch(...) { } return false; }
/** assure journal/ dir exists. throws. call during startup. */ void journalMakeDir() { j.init(); filesystem::path p = getJournalDir(); j.dir = p.string(); log() << "journal dir=" << j.dir << endl; if( !exists(j.dir) ) { try { create_directory(j.dir); } catch(std::exception& e) { log() << "error creating directory " << j.dir << ' ' << e.what() << endl; throw; } } }
/** * The main durability thread loop. There is a single instance of this function running. */ static void durThread(ClockSource* cs, int64_t serverStartMs) { Client::initThread("durability"); log() << "Durability thread started"; bool samePartition = true; try { const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string(); samePartition = onSamePartition(getJournalDir().string(), dbpathDir); } catch (...) { } // Spawn the journal writer thread JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites); journalWriter.start(); // Used as an estimate of how much / how fast to remap uint64_t commitCounter(0); uint64_t estimatedPrivateMapSize(0); uint64_t remapLastTimestamp(0); while (shutdownRequested.loadRelaxed() == 0) { unsigned ms = storageGlobalParams.journalCommitIntervalMs; if (ms == 0) { ms = samePartition ? 100 : 30; } // +1 so it never goes down to zero const int64_t oneThird = (ms / 3) + 1; // Reset the stats based on the reset interval if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) { stats.reset(); } try { stdx::unique_lock<stdx::mutex> lock(flushMutex); for (unsigned i = 0; i <= 2; i++) { if (stdx::cv_status::no_timeout == flushRequested.wait_for(lock, Milliseconds(oneThird).toSystemDuration())) { // Someone forced a flush break; } if (commitNotify.nWaiting()) { // One or more getLastError j:true is pending break; } if (commitJob.bytes() > UncommittedBytesLimit / 2) { // The number of written bytes is growing break; } } // The commit logic itself LOG(4) << "groupCommit begin"; Timer t; const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext(); OperationContext& txn = *txnPtr; AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState()); // We need to snapshot the commitNumber after the flush lock has been obtained, // because at this point we know that we have a stable snapshot of the data. const CommitNotifier::When commitNumber(commitNotify.now()); LOG(4) << "Processing commit number " << commitNumber; if (!commitJob.hasWritten()) { // We do not need the journal lock anymore. Free it here, for the really // unlikely possibility that the writeBuffer command below blocks. autoFlushLock.release(); // getlasterror request could have came after the data was already committed. // No need to call committingReset though, because we have not done any // writes (hasWritten == false). JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); buffer->setNoop(); buffer->journalListenerToken = getJournalListener()->getToken(); journalWriter.writeBuffer(buffer, commitNumber); } else { // This copies all the in-memory changes into the journal writer's buffer. JournalWriter::Buffer* const buffer = journalWriter.newBuffer(); PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder(), cs, serverStartMs); estimatedPrivateMapSize += commitJob.bytes(); commitCounter++; // Now that the write intents have been copied to the buffer, the commit job is // free to be reused. We need to reset the commit job's contents while under // the S flush lock, because otherwise someone might have done a write and this // would wipe out their changes without ever being committed. commitJob.committingReset(); double systemMemoryPressurePercentage = ProcessInfo::getSystemMemoryPressurePercentage(); // Now that the in-memory modifications have been collected, we can potentially // release the flush lock if remap is not necessary. // When we remap due to memory pressure, we look at two criteria // 1. If the amount of 4k pages touched exceeds 512 MB, // a reasonable estimate of memory pressure on Linux. // 2. Check if the amount of free memory on the machine is running low, // since #1 is underestimates the memory pressure on Windows since // commits in 64MB chunks. const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) || (systemMemoryPressurePercentage > 0.0) || (commitCounter % NumCommitsBeforeRemap == 0) || (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap); double remapFraction = 0.0; if (shouldRemap) { // We want to remap all private views about every 2 seconds. There could be // ~1000 views so we do a little each pass. There will be copy on write // faults after remapping, so doing a little bit at a time will avoid big // load spikes when the pages are touched. // // TODO: Instead of the time-based logic above, consider using ProcessInfo // and watching for getResidentSize to drop, which is more precise. remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0; if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) { remapFraction = 1; } else { // We don't want to get close to the UncommittedBytesLimit const double remapMemFraction = estimatedPrivateMapSize / ((double)UncommittedBytesLimit); remapFraction = std::max(remapMemFraction, remapFraction); remapFraction = std::max(systemMemoryPressurePercentage, remapFraction); } } else { LOG(4) << "Early release flush lock"; // We will not be doing a remap so drop the flush lock. That way we will be // doing the journal I/O outside of lock, so other threads can proceed. invariant(!shouldRemap); autoFlushLock.release(); } buffer->journalListenerToken = getJournalListener()->getToken(); // Request async I/O to the journal. This may block. journalWriter.writeBuffer(buffer, commitNumber); // Data has now been written to the shared view. If remap was requested, we // would still be holding the S flush lock here, so just upgrade it and // perform the remap. if (shouldRemap) { // Need to wait for the previously scheduled journal writes to complete // before any remap is attempted. journalWriter.flush(); journalWriter.assertIdle(); // Upgrading the journal lock to flush stops all activity on the system, // because we will be remapping memory and we don't want readers to be // accessing it. Technically this step could be avoided on systems, which // support atomic remap. autoFlushLock.upgradeFlushLockToExclusive(); remapPrivateView(remapFraction); autoFlushLock.release(); // Reset the private map estimate outside of the lock estimatedPrivateMapSize = 0; remapLastTimestamp = curTimeMicros64(); stats.curr()->_commitsInWriteLock++; stats.curr()->_commitsInWriteLockMicros += t.micros(); } } stats.curr()->_commits++; stats.curr()->_commitsMicros += t.micros(); LOG(4) << "groupCommit end"; } catch (DBException& e) { severe() << "dbexception in durThread causing immediate shutdown: " << e.toString(); invariant(false); } catch (std::ios_base::failure& e) { severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::bad_alloc& e) { severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (std::exception& e) { severe() << "exception in durThread causing immediate shutdown: " << e.what(); invariant(false); } catch (...) { severe() << "unhandled exception in durThread causing immediate shutdown"; invariant(false); } } // Stops the journal thread and ensures everything was written invariant(!commitJob.hasWritten()); journalWriter.flush(); journalWriter.shutdown(); log() << "Durability thread stopped"; }
static void durThread() { Client::initThread("journal"); bool samePartition = true; try { const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string(); samePartition = onSamePartition(getJournalDir().string(), dbpathDir); } catch(...) { } while (shutdownRequested.loadRelaxed() == 0) { unsigned ms = storageGlobalParams.journalCommitInterval; if( ms == 0 ) { ms = samePartition ? 100 : 30; } unsigned oneThird = (ms / 3) + 1; // +1 so never zero try { stats.rotate(); boost::mutex::scoped_lock lock(flushMutex); // commit sooner if one or more getLastError j:true is pending for (unsigned i = 0; i <= 2; i++) { if (flushRequested.timed_wait(lock, Milliseconds(oneThird))) { // Someone forced a flush break; } if (commitJob._notify.nWaiting()) break; if (commitJob.bytes() > UncommittedBytesLimit / 2) break; } OperationContextImpl txn; // Waits for all active operations to drain and won't let new ones start. This // should be optimized to allow readers in (see SERVER-15262). AutoAcquireFlushLockForMMAPV1Commit flushLock(txn.lockState()); groupCommit(); remapPrivateView(); } catch(std::exception& e) { log() << "exception in durThread causing immediate shutdown: " << e.what() << endl; mongoAbort("exception in durThread"); } catch (...) { log() << "unhandled exception in durThread causing immediate shutdown" << endl; mongoAbort("unhandled exception in durThread"); } } cc().shutdown(); }
path lsnPath() { return getJournalDir()/"lsn"; }