Esempio n. 1
0
/**
 * Remaps the private view from the shared view so that it does not consume too much
 * copy-on-write/swap space. Must only be called after the in-memory journal has been flushed
 * to disk and applied on top of the shared view.
 *
 * @param fraction Value between (0, 1] indicating what fraction of the memory to remap.
 *      Remapping too much or too frequently incurs copy-on-write page fault cost.
 */
static void remapPrivateView(double fraction) {
    // Remapping private views must occur after WRITETODATAFILES otherwise we wouldn't see any
    // newly written data on reads.
    invariant(!commitJob.hasWritten());

    try {
        Timer t;
        remapPrivateViewImpl(fraction);
        stats.curr()->_remapPrivateViewMicros += t.micros();

        LOG(4) << "remapPrivateView end";
        return;
    } catch (DBException& e) {
        severe() << "dbexception in remapPrivateView causing immediate shutdown: " << e.toString();
    } catch (std::ios_base::failure& e) {
        severe() << "ios_base exception in remapPrivateView causing immediate shutdown: "
                 << e.what();
    } catch (std::bad_alloc& e) {
        severe() << "bad_alloc exception in remapPrivateView causing immediate shutdown: "
                 << e.what();
    } catch (std::exception& e) {
        severe() << "exception in remapPrivateView causing immediate shutdown: " << e.what();
    } catch (...) {
        severe() << "unknown exception in remapPrivateView causing immediate shutdown: ";
    }

    invariant(false);
}
Esempio n. 2
0
void DurableImpl::closingFileNotification() {
    if (commitJob.hasWritten()) {
        severe() << "journal warning files are closing outside locks with writes pending";

        // File is closing while there are unwritten changes
        invariant(false);
    }
}
Esempio n. 3
0
        /** locking in read lock when called 
            @see MongoMMF::close()
        */
        static void groupCommit() {
            dbMutex.assertAtLeastReadLocked();

            if( !commitJob.hasWritten() )
                return;

            PREPLOGBUFFER();

            WRITETOJOURNAL(commitJob._ab);

            // data is now in the journal, which is sufficient for acknowledging getlasterror. 
            // (ok to crash after that)
            log() << "TEMP NOTIFYING COMMITTED" << endl;
            commitJob.notifyCommitted();

            // write the noted write intent entries to the data files.
            // this has to come after writing to the journal, obviously...
            MongoFile::markAllWritable(); // for _DEBUG. normally we don't write in a read lock
            WRITETODATAFILES();
            if (!dbMutex.isWriteLocked())
                MongoFile::unmarkAllWritable();

            commitJob.reset();

            // REMAPPRIVATEVIEW
            // 
            // remapping private views must occur after WRITETODATAFILES otherwise 
            // we wouldn't see newly written data on reads.
            // 
            DEV assert( !commitJob.hasWritten() );
            if( !dbMutex.isWriteLocked() ) { 
                // this needs done in a write lock thus we do it on the next acquisition of that 
                // instead of here (there is no rush if you aren't writing anyway -- but it must happen, 
                // if it is done, before any uncommitted writes occur).
                //
                dbMutex._remapPrivateViewRequested = true;
            }
            else { 
                // however, if we are already write locked, we must do it now -- up the call tree someone 
                // may do a write without a new lock acquisition.  this can happen when MongoMMF::close() calls
                // this method when a file (and its views) is about to go away.
                //
                REMAPPRIVATEVIEW();
            }
        }
Esempio n. 4
0
 /** called when a MongoMMF is closing -- we need to go ahead and group commit in that case before its 
     views disappear 
 */
 void closingFileNotification() {
     if( dbMutex.atLeastReadLocked() ) {
         groupCommit(); 
     }
     else {
         assert( inShutdown() );
         if( commitJob.hasWritten() ) { 
             log() << "dur warning files are closing outside locks with writes pending" << endl;
         }
     }
 }
Esempio n. 5
0
        /** We need to remap the private views periodically. otherwise they would become very large.
            Call within write lock.
        */
        void REMAPPRIVATEVIEW() { 
            static unsigned startAt;
            static unsigned long long lastRemap;

            dbMutex.assertWriteLocked();
            dbMutex._remapPrivateViewRequested = false;
            assert( !commitJob.hasWritten() );

            if( 0 ) { 
                log() << "TEMP remapprivateview disabled for testing - will eventually run oom in this mode if db bigger than ram" << endl;
                return;
            }

            // we want to remap all private views about every 2 seconds.  there could be ~1000 views so 
            // we do a little each pass; beyond the remap time, more significantly, there will be copy on write 
            // faults after remapping, so doing a little bit at a time will avoid big load spikes on 
            // remapping.
            unsigned long long now = curTimeMicros64();
            double fraction = (now-lastRemap)/20000000.0;

            set<MongoFile*>& files = MongoFile::getAllFiles();
            unsigned sz = files.size();
            if( sz == 0 ) 
                return;

            unsigned ntodo = (unsigned) (sz * fraction);
            if( ntodo < 1 ) ntodo = 1;
            if( ntodo > sz ) ntodo = sz;

            const set<MongoFile*>::iterator b = files.begin();
            const set<MongoFile*>::iterator e = files.end();
            set<MongoFile*>::iterator i = b;
            // skip to our starting position
            for( unsigned x = 0; x < startAt; x++ ) {
                i++;
                if( i == e ) i = b;
            }
            startAt = (startAt + ntodo) % sz; // mark where to start next time

            for( unsigned x = 0; x < ntodo; x++ ) {
                dassert( i != e );
                if( (*i)->isMongoMMF() ) {
                    MongoMMF *mmf = (MongoMMF*) *i;
                    assert(mmf);
                    if( mmf->willNeedRemap() ) {
                        mmf->willNeedRemap() = false;
                        mmf->remapThePrivateView();
                    }
                    i++;
                    if( i == e ) i = b;
                }
            }
        }
Esempio n. 6
0
        static void go() {
            if( !commitJob.hasWritten() )
                return;

            {
                readlocktry lk("", 1000);
                if( lk.got() ) {
                    groupCommit();
                    return;
                }
            }

            // starvation on read locks could occur.  so if read lock acquisition is slow, try to get a 
            // write lock instead.  otherwise writes could use too much RAM.
            writelock lk;
            groupCommit();
        }
Esempio n. 7
0
/**
 * The main durability thread loop. There is a single instance of this function running.
 */
static void durThread(ClockSource* cs, int64_t serverStartMs) {
    Client::initThread("durability");

    log() << "Durability thread started";

    bool samePartition = true;
    try {
        const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string();
        samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
    } catch (...) {
    }

    // Spawn the journal writer thread
    JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites);
    journalWriter.start();

    // Used as an estimate of how much / how fast to remap
    uint64_t commitCounter(0);
    uint64_t estimatedPrivateMapSize(0);
    uint64_t remapLastTimestamp(0);

    while (shutdownRequested.loadRelaxed() == 0) {
        unsigned ms = storageGlobalParams.journalCommitIntervalMs;
        if (ms == 0) {
            ms = samePartition ? 100 : 30;
        }

        // +1 so it never goes down to zero
        const int64_t oneThird = (ms / 3) + 1;

        // Reset the stats based on the reset interval
        if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) {
            stats.reset();
        }

        try {
            stdx::unique_lock<stdx::mutex> lock(flushMutex);

            for (unsigned i = 0; i <= 2; i++) {
                if (stdx::cv_status::no_timeout ==
                    flushRequested.wait_for(lock, Milliseconds(oneThird).toSystemDuration())) {
                    // Someone forced a flush
                    break;
                }

                if (commitNotify.nWaiting()) {
                    // One or more getLastError j:true is pending
                    break;
                }

                if (commitJob.bytes() > UncommittedBytesLimit / 2) {
                    // The number of written bytes is growing
                    break;
                }
            }

            // The commit logic itself
            LOG(4) << "groupCommit begin";

            Timer t;

            const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext();
            OperationContext& txn = *txnPtr;
            AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState());

            // We need to snapshot the commitNumber after the flush lock has been obtained,
            // because at this point we know that we have a stable snapshot of the data.
            const CommitNotifier::When commitNumber(commitNotify.now());

            LOG(4) << "Processing commit number " << commitNumber;

            if (!commitJob.hasWritten()) {
                // We do not need the journal lock anymore. Free it here, for the really
                // unlikely possibility that the writeBuffer command below blocks.
                autoFlushLock.release();

                // getlasterror request could have came after the data was already committed.
                // No need to call committingReset though, because we have not done any
                // writes (hasWritten == false).
                JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
                buffer->setNoop();
                buffer->journalListenerToken = getJournalListener()->getToken();

                journalWriter.writeBuffer(buffer, commitNumber);
            } else {
                // This copies all the in-memory changes into the journal writer's buffer.
                JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
                PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder(), cs, serverStartMs);

                estimatedPrivateMapSize += commitJob.bytes();
                commitCounter++;

                // Now that the write intents have been copied to the buffer, the commit job is
                // free to be reused. We need to reset the commit job's contents while under
                // the S flush lock, because otherwise someone might have done a write and this
                // would wipe out their changes without ever being committed.
                commitJob.committingReset();

                double systemMemoryPressurePercentage =
                    ProcessInfo::getSystemMemoryPressurePercentage();

                // Now that the in-memory modifications have been collected, we can potentially
                // release the flush lock if remap is not necessary.
                // When we remap due to memory pressure, we look at two criteria
                // 1. If the amount of 4k pages touched exceeds 512 MB,
                //    a reasonable estimate of memory pressure on Linux.
                // 2. Check if the amount of free memory on the machine is running low,
                //    since #1 is underestimates the memory pressure on Windows since
                //    commits in 64MB chunks.
                const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) ||
                    (systemMemoryPressurePercentage > 0.0) ||
                    (commitCounter % NumCommitsBeforeRemap == 0) ||
                    (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap);

                double remapFraction = 0.0;

                if (shouldRemap) {
                    // We want to remap all private views about every 2 seconds. There could be
                    // ~1000 views so we do a little each pass. There will be copy on write
                    // faults after remapping, so doing a little bit at a time will avoid big
                    // load spikes when the pages are touched.
                    //
                    // TODO: Instead of the time-based logic above, consider using ProcessInfo
                    //       and watching for getResidentSize to drop, which is more precise.
                    remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0;

                    if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) {
                        remapFraction = 1;
                    } else {
                        // We don't want to get close to the UncommittedBytesLimit
                        const double remapMemFraction =
                            estimatedPrivateMapSize / ((double)UncommittedBytesLimit);

                        remapFraction = std::max(remapMemFraction, remapFraction);

                        remapFraction = std::max(systemMemoryPressurePercentage, remapFraction);
                    }
                } else {
                    LOG(4) << "Early release flush lock";

                    // We will not be doing a remap so drop the flush lock. That way we will be
                    // doing the journal I/O outside of lock, so other threads can proceed.
                    invariant(!shouldRemap);
                    autoFlushLock.release();
                }

                buffer->journalListenerToken = getJournalListener()->getToken();
                // Request async I/O to the journal. This may block.
                journalWriter.writeBuffer(buffer, commitNumber);

                // Data has now been written to the shared view. If remap was requested, we
                // would still be holding the S flush lock here, so just upgrade it and
                // perform the remap.
                if (shouldRemap) {
                    // Need to wait for the previously scheduled journal writes to complete
                    // before any remap is attempted.
                    journalWriter.flush();
                    journalWriter.assertIdle();

                    // Upgrading the journal lock to flush stops all activity on the system,
                    // because we will be remapping memory and we don't want readers to be
                    // accessing it. Technically this step could be avoided on systems, which
                    // support atomic remap.
                    autoFlushLock.upgradeFlushLockToExclusive();
                    remapPrivateView(remapFraction);

                    autoFlushLock.release();

                    // Reset the private map estimate outside of the lock
                    estimatedPrivateMapSize = 0;
                    remapLastTimestamp = curTimeMicros64();

                    stats.curr()->_commitsInWriteLock++;
                    stats.curr()->_commitsInWriteLockMicros += t.micros();
                }
            }

            stats.curr()->_commits++;
            stats.curr()->_commitsMicros += t.micros();

            LOG(4) << "groupCommit end";
        } catch (DBException& e) {
            severe() << "dbexception in durThread causing immediate shutdown: " << e.toString();
            invariant(false);
        } catch (std::ios_base::failure& e) {
            severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (std::bad_alloc& e) {
            severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (std::exception& e) {
            severe() << "exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (...) {
            severe() << "unhandled exception in durThread causing immediate shutdown";
            invariant(false);
        }
    }

    // Stops the journal thread and ensures everything was written
    invariant(!commitJob.hasWritten());

    journalWriter.flush();
    journalWriter.shutdown();

    log() << "Durability thread stopped";
}