Exemple #1
0
    void FSyncLockThread::doRealWork() {
        SimpleMutex::scoped_lock lkf(filesLockedFsync);

        OperationContextImpl txn;
        ScopedTransaction transaction(&txn, MODE_X);
        Lock::GlobalWrite global(txn.lockState()); // No WriteUnitOfWork needed

        SimpleMutex::scoped_lock lk(fsyncCmd.m);
        
        invariant(!fsyncCmd.locked);    // impossible to get here if locked is true
        try { 
            getDur().syncDataAndTruncateJournal(&txn);
        } 
        catch( std::exception& e ) { 
            error() << "error doing syncDataAndTruncateJournal: " << e.what() << endl;
            fsyncCmd.err = e.what();
            fsyncCmd._threadSync.notify_one();
            fsyncCmd.locked = false;
            return;
        }

        txn.lockState()->downgradeGlobalXtoSForMMAPV1();

        try {
            StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();
            storageEngine->flushAllFiles(true);
        }
        catch( std::exception& e ) { 
            error() << "error doing flushAll: " << e.what() << endl;
            fsyncCmd.err = e.what();
            fsyncCmd._threadSync.notify_one();
            fsyncCmd.locked = false;
            return;
        }

        invariant(!fsyncCmd.locked);
        fsyncCmd.locked = true;
        
        fsyncCmd._threadSync.notify_one();

        while ( ! fsyncCmd.pendingUnlock ) {
            fsyncCmd._unlockSync.wait(fsyncCmd.m);
        }
        fsyncCmd.pendingUnlock = false;
        
        fsyncCmd.locked = false;
        fsyncCmd.err = "unlocked";

        fsyncCmd._unlockSync.notify_one();
    }
Exemple #2
0
bool getInitialSyncFlag() {
    OperationContextImpl txn;
    MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
        ScopedTransaction transaction(&txn, MODE_IX);
        Lock::DBLock lk(txn.lockState(), "local", MODE_X);
        BSONObj mv;
        bool found = Helpers::getSingleton(&txn, minvalidNS, mv);
        if (found) {
            return mv[initialSyncFlagString].trueValue();
        }
        return false;
    }
    MONGO_WRITE_CONFLICT_RETRY_LOOP_END(&txn, "getInitialSyncFlags", minvalidNS);
}
Exemple #3
0
    TEST(DBHelperTests, FindDiskLocs) {
        OperationContextImpl txn;
        DBDirectClient client(&txn);

        // Some unique tag we can use to make sure we're pulling back the right data
        OID tag = OID::gen();
        client.remove( ns, BSONObj() );

        int numDocsInserted = 10;
        for ( int i = 0; i < numDocsInserted; ++i ) {
            client.insert( ns, BSON( "_id" << i << "tag" << tag ) );
        }

        long long maxSizeBytes = 1024 * 1024 * 1024;

        set<DiskLoc> locs;
        long long numDocsFound;
        long long estSizeBytes;
        {
            // search _id range (0, 10)
            Lock::DBRead lk(txn.lockState(), ns);

            KeyRange range( ns,
                            BSON( "_id" << 0 ),
                            BSON( "_id" << numDocsInserted ),
                            BSON( "_id" << 1 ) );

            Status result = Helpers::getLocsInRange( &txn,
                                                     range,
                                                     maxSizeBytes,
                                                     &locs,
                                                     &numDocsFound,
                                                     &estSizeBytes );

            ASSERT_EQUALS( result, Status::OK() );
            ASSERT_EQUALS( numDocsFound, numDocsInserted );
            ASSERT_NOT_EQUALS( estSizeBytes, 0 );
            ASSERT_LESS_THAN( estSizeBytes, maxSizeBytes );

            Database* db = dbHolder().get( &txn, nsToDatabase(range.ns) );
            const Collection* collection = db->getCollection(&txn, ns);

            // Make sure all the disklocs actually correspond to the right info
            for ( set<DiskLoc>::const_iterator it = locs.begin(); it != locs.end(); ++it ) {
                const BSONObj obj = collection->docFor(&txn, *it);
                ASSERT_EQUALS(obj["tag"].OID(), tag);
            }
        }
    }
Exemple #4
0
    void FSyncLockThread::doRealWork() {
        SimpleMutex::scoped_lock lkf(filesLockedFsync);

        OperationContextImpl txn;   // XXX?
        Lock::GlobalWrite global(txn.lockState());

        SimpleMutex::scoped_lock lk(fsyncCmd.m);
        
        verify( ! fsyncCmd.locked ); // impossible to get here if locked is true
        try { 
            getDur().syncDataAndTruncateJournal();
        } 
        catch( std::exception& e ) { 
            error() << "error doing syncDataAndTruncateJournal: " << e.what() << endl;
            fsyncCmd.err = e.what();
            fsyncCmd._threadSync.notify_one();
            fsyncCmd.locked = false;
            return;
        }
        
        global.downgrade();
        
        try {
            MemoryMappedFile::flushAll(true);
        }
        catch( std::exception& e ) { 
            error() << "error doing flushAll: " << e.what() << endl;
            fsyncCmd.err = e.what();
            fsyncCmd._threadSync.notify_one();
            fsyncCmd.locked = false;
            return;
        }

        verify( ! fsyncCmd.locked );
        fsyncCmd.locked = true;
        
        fsyncCmd._threadSync.notify_one();

        while ( ! fsyncCmd.pendingUnlock ) {
            fsyncCmd._unlockSync.wait(fsyncCmd.m);
        }
        fsyncCmd.pendingUnlock = false;
        
        fsyncCmd.locked = false;
        fsyncCmd.err = "unlocked";

        fsyncCmd._unlockSync.notify_one();
    }
void ServiceContextMongoDTest::_dropAllDBs() {
    OperationContextImpl txn;
    dropAllDatabasesExceptLocal(&txn);

    ScopedTransaction transaction(&txn, MODE_X);
    Lock::GlobalWrite lk(txn.lockState());
    AutoGetDb autoDBLocal(&txn, "local", MODE_X);
    const auto localDB = autoDBLocal.getDb();
    if (localDB) {
        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            // Do not wrap in a WriteUnitOfWork until SERVER-17103 is addressed.
            autoDBLocal.getDb()->dropDatabase(&txn, localDB);
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(&txn, "_dropAllDBs", "local");
    }
}
Exemple #6
0
    /** write an op to the oplog that is already built.
        todo : make _logOpRS() call this so we don't repeat ourself?
        */
    void _logOpObjRS(const BSONObj& op) {
        OperationContextImpl txn;
        Lock::DBWrite lk(txn.lockState(), "local");

        const OpTime ts = op["ts"]._opTime();
        long long h = op["h"].numberLong();

        {
            if ( localOplogRSCollection == 0 ) {
                Client::Context ctx(rsoplog, storageGlobalParams.dbpath);
                localDB = ctx.db();
                verify( localDB );
                localOplogRSCollection = localDB->getCollection( &txn, rsoplog );
                massert(13389,
                        "local.oplog.rs missing. did you drop it? if so restart server",
                        localOplogRSCollection);
            }
            Client::Context ctx(rsoplog, localDB);
            checkOplogInsert( localOplogRSCollection->insertDocument( &txn, op, false ) );

            /* todo: now() has code to handle clock skew.  but if the skew server to server is large it will get unhappy.
                     this code (or code in now() maybe) should be improved.
                     */
            if( theReplSet ) {
                if( !(theReplSet->lastOpTimeWritten<ts) ) {
                    log() << "replication oplog stream went back in time. previous timestamp: "
                          << theReplSet->lastOpTimeWritten << " newest timestamp: " << ts
                          << ". attempting to sync directly from primary." << endl;
                    std::string errmsg;
                    BSONObjBuilder result;
                    if (!theReplSet->forceSyncFrom(theReplSet->box.getPrimary()->fullName(),
                                                   errmsg, result)) {
                        log() << "Can't sync from primary: " << errmsg << endl;
                    }
                }
                theReplSet->lastOpTimeWritten = ts;
                theReplSet->lastH = h;
                ctx.getClient()->setLastOp( ts );

                BackgroundSync::notify();
            }
        }

        setNewOptime(ts);
    }
Exemple #7
0
            void run() {
                const string dbName = "rollback_drop_collection";
                const string droppedName = dbName + ".dropped";
                const string rolledBackName = dbName + ".rolled_back";

                OperationContextImpl txn;

                ScopedTransaction transaction(&txn, MODE_IX);
                Lock::DBLock lk(txn.lockState(), dbName, MODE_X);

                bool justCreated;
                Database* db = dbHolder().openDb(&txn, dbName, &justCreated);
                ASSERT(justCreated);

                {
                    WriteUnitOfWork wunit(&txn);
                    ASSERT_FALSE(db->getCollection(droppedName));
                    Collection* droppedColl;
                    droppedColl = db->createCollection(&txn, droppedName);
                    ASSERT_EQUALS(db->getCollection(droppedName), droppedColl);
                    db->dropCollection(&txn, droppedName);
                    wunit.commit();
                }

                //  Should have been really dropped
                ASSERT_FALSE(db->getCollection(droppedName));

                {
                    WriteUnitOfWork wunit(&txn);
                    ASSERT_FALSE(db->getCollection(rolledBackName));
                    Collection* rolledBackColl = db->createCollection(&txn, rolledBackName);
                    wunit.commit();
                    ASSERT_EQUALS(db->getCollection(rolledBackName), rolledBackColl);
                    db->dropCollection(&txn, rolledBackName);
                    // not committing so dropping should be rolled back
                }

                // The rolledBackCollection dropping should have been rolled back.
                // Original Collection pointers are no longer valid.
                ASSERT(db->getCollection(rolledBackName));

                // The droppedCollection should not have been restored by the rollback.
                ASSERT_FALSE(db->getCollection(droppedName));
            }
Exemple #8
0
static void logStartup() {
    BSONObjBuilder toLog;
    stringstream id;
    id << getHostNameCached() << "-" << jsTime().asInt64();
    toLog.append("_id", id.str());
    toLog.append("hostname", getHostNameCached());

    toLog.appendTimeT("startTime", time(0));
    toLog.append("startTimeLocal", dateToCtimeString(Date_t::now()));

    toLog.append("cmdLine", serverGlobalParams.parsedOpts);
    toLog.append("pid", ProcessId::getCurrent().asLongLong());


    BSONObjBuilder buildinfo(toLog.subobjStart("buildinfo"));
    appendBuildInfo(buildinfo);
    appendStorageEngineList(&buildinfo);
    buildinfo.doneFast();

    BSONObj o = toLog.obj();

    OperationContextImpl txn;

    ScopedTransaction transaction(&txn, MODE_X);
    Lock::GlobalWrite lk(txn.lockState());
    AutoGetOrCreateDb autoDb(&txn, "local", mongo::MODE_X);
    Database* db = autoDb.getDb();
    const std::string ns = "local.startup_log";
    Collection* collection = db->getCollection(ns);
    WriteUnitOfWork wunit(&txn);
    if (!collection) {
        BSONObj options = BSON("capped" << true << "size" << 10 * 1024 * 1024);
        bool shouldReplicateWrites = txn.writesAreReplicated();
        txn.setReplicatedWrites(false);
        ON_BLOCK_EXIT(&OperationContext::setReplicatedWrites, &txn, shouldReplicateWrites);
        uassertStatusOK(userCreateNS(&txn, db, ns, options));
        collection = db->getCollection(ns);
    }
    invariant(collection);
    uassertStatusOK(collection->insertDocument(&txn, o, false).getStatus());
    wunit.commit();
}
Exemple #9
0
    OpTime SyncTail::applyOpsToOplog(std::deque<BSONObj>* ops) {
        OpTime lastOpTime;
        {
            OperationContextImpl txn; // XXX?
            Lock::DBLock lk(txn.lockState(), "local", MODE_X);
            WriteUnitOfWork wunit(&txn);

            while (!ops->empty()) {
                const BSONObj& op = ops->front();
                // this updates lastOpTimeApplied
                lastOpTime = _logOpObjRS(&txn, op);
                ops->pop_front();
             }
            wunit.commit();
        }

        // Update write concern on primary
        BackgroundSync::get()->notify();
        return lastOpTime;
    }
Exemple #10
0
    void SyncTail::applyOpsToOplog(std::deque<BSONObj>* ops) {
        {
            OperationContextImpl txn; // XXX?
            Lock::DBWrite lk(txn.lockState(), "local");

            while (!ops->empty()) {
                const BSONObj& op = ops->front();
                // this updates theReplSet->lastOpTimeWritten
                _logOpObjRS(op);
                ops->pop_front();
             }
        }

        if (BackgroundSync::get()->isAssumingPrimary()) {
            LOG(1) << "notifying BackgroundSync";
        }
            
        // Update write concern on primary
        BackgroundSync::notify();
    }
void pretouchN(vector<BSONObj>& v, unsigned a, unsigned b) {
    Client* c = currentClient.get();
    if (c == 0) {
        Client::initThread("pretouchN");
        c = &cc();
    }

    OperationContextImpl txn;  // XXX
    ScopedTransaction transaction(&txn, MODE_S);
    Lock::GlobalRead lk(txn.lockState());

    for (unsigned i = a; i <= b; i++) {
        const BSONObj& op = v[i];
        const char* which = "o";
        const char* opType = op.getStringField("op");
        if (*opType == 'i')
            ;
        else if (*opType == 'u')
            which = "o2";
        else
            continue;
        /* todo : other operations */

        try {
            BSONObj o = op.getObjectField(which);
            BSONElement _id;
            if (o.getObjectID(_id)) {
                const char* ns = op.getStringField("ns");
                BSONObjBuilder b;
                b.append(_id);
                BSONObj result;
                Client::Context ctx(&txn, ns);
                if (Helpers::findById(&txn, ctx.db(), ns, b.done(), result))
                    _dummy_z += result.objsize();  // touch
            }
        } catch (DBException& e) {
            log() << "ignoring assertion in pretouchN() " << a << ' ' << b << ' ' << i << ' '
                  << e.toString() << endl;
        }
    }
}
Exemple #12
0
        void run() {
            for ( int i = 0; i < 10; ++i ) {
                client.insert( ns, BSON( "_id" << i ) );
            }

            {
                // Remove _id range [_min, _max).
                OperationContextImpl txn;
                Lock::DBWrite lk(txn.lockState(), ns);
                Client::Context ctx( ns );

                KeyRange range( ns,
                                BSON( "_id" << _min ),
                                BSON( "_id" << _max ),
                                BSON( "_id" << 1 ) );
                Helpers::removeRange( &txn, range );
            }

            // Check that the expected documents remain.
            ASSERT_EQUALS( expected(), docs() );
        }
Exemple #13
0
    void IndexBuilder::run() {
        Client::initThread(name().c_str());
        LOG(2) << "IndexBuilder building index " << _index;

        OperationContextImpl txn;
        txn.lockState()->setIsBatchWriter(true);

        txn.getClient()->getAuthorizationSession()->grantInternalAuthorization();

        txn.getCurOp()->reset(HostAndPort(), dbInsert);
        NamespaceString ns(_index["ns"].String());

        ScopedTransaction transaction(&txn, MODE_IX);
        Lock::DBLock dlk(txn.lockState(), ns.db(), MODE_X);
        Client::Context ctx(&txn, ns.getSystemIndexesCollection());

        Database* db = dbHolder().get(&txn, ns.db().toString());

        Status status = _build(&txn, db, true, &dlk);
        if ( !status.isOK() ) {
            error() << "IndexBuilder could not build index: " << status.toString();
            fassert(28555, ErrorCodes::isInterruption(status.code()));
        }

        txn.getClient()->shutdown();
    }
Exemple #14
0
            void run() {
                const string dbName = "rollback_create_collection";
                const string committedName = dbName + ".committed";
                const string rolledBackName = dbName + ".rolled_back";

                OperationContextImpl txn;

                ScopedTransaction transaction(&txn, MODE_IX);
                Lock::DBLock lk(txn.lockState(), dbName, MODE_X);

                bool justCreated;
                Database* db = dbHolder().openDb(&txn, dbName, &justCreated);
                ASSERT(justCreated);

                Collection* committedColl;
                {
                    WriteUnitOfWork wunit(&txn);
                    ASSERT_FALSE(db->getCollection(committedName));
                    committedColl = db->createCollection(&txn, committedName);
                    ASSERT_EQUALS(db->getCollection(committedName), committedColl);
                    wunit.commit();
                }

                ASSERT_EQUALS(db->getCollection(committedName), committedColl);

                {
                    WriteUnitOfWork wunit(&txn);
                    ASSERT_FALSE(db->getCollection(rolledBackName));
                    Collection* rolledBackColl = db->createCollection(&txn, rolledBackName);
                    ASSERT_EQUALS(db->getCollection(rolledBackName), rolledBackColl);
                    // not committing so creation should be rolled back
                }

                // The rolledBackCollection creation should have been rolled back
                ASSERT_FALSE(db->getCollection(rolledBackName));

                // The committedCollection should not have been affected by the rollback. Holders
                // of the original Collection pointer should still be valid.
                ASSERT_EQUALS(db->getCollection(committedName), committedColl);
            }
Exemple #15
0
// This free function is used by the initial sync writer threads to apply each op
void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
    initializeWriterThread();

    OperationContextImpl txn;
    txn.setReplicatedWrites(false);
    DisableDocumentValidation validationDisabler(&txn);

    // allow us to get through the magic barrier
    txn.lockState()->setIsBatchWriter(true);

    bool convertUpdatesToUpserts = false;

    for (std::vector<BSONObj>::const_iterator it = ops.begin(); it != ops.end(); ++it) {
        try {
            const Status s = SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts);
            if (!s.isOK()) {
                if (st->shouldRetry(&txn, *it)) {
                    const Status s2 = SyncTail::syncApply(&txn, *it, convertUpdatesToUpserts);
                    if (!s2.isOK()) {
                        severe() << "Error applying operation (" << it->toString() << "): " << s2;
                        fassertFailedNoTrace(15915);
                    }
                }

                // If shouldRetry() returns false, fall through.
                // This can happen if the document that was moved and missed by Cloner
                // subsequently got deleted and no longer exists on the Sync Target at all
            }
        } catch (const DBException& e) {
            severe() << "writer worker caught exception: " << causedBy(e)
                     << " on: " << it->toString();

            if (inShutdown()) {
                return;
            }

            fassertFailedNoTrace(16361);
        }
    }
}
Exemple #16
0
    TEST(DBHelperTests, FindDiskLocsTooBig) {

        DBDirectClient client;
        OperationContextImpl txn;

        client.remove( ns, BSONObj() );

        int numDocsInserted = 10;
        for ( int i = 0; i < numDocsInserted; ++i ) {
            client.insert( ns, BSON( "_id" << i ) );
        }

        // Very small max size
        long long maxSizeBytes = 10;

        set<DiskLoc> locs;
        long long numDocsFound;
        long long estSizeBytes;
        {
            Lock::DBRead lk(txn.lockState(), ns);
            Client::Context ctx( ns );
            KeyRange range( ns,
                            BSON( "_id" << 0 ),
                            BSON( "_id" << numDocsInserted ),
                            BSON( "_id" << 1 ) );

            Status result = Helpers::getLocsInRange( &txn,
                                                     range,
                                                     maxSizeBytes,
                                                     &locs,
                                                     &numDocsFound,
                                                     &estSizeBytes );

            // Make sure we get the right error code and our count and size estimates are valid
            ASSERT_EQUALS( result.code(), ErrorCodes::InvalidLength );
            ASSERT_EQUALS( numDocsFound, numDocsInserted );
            ASSERT_GREATER_THAN( estSizeBytes, maxSizeBytes );
        }
    }
Exemple #17
0
        static void insert( const BSONObj &o, bool god = false ) {
            OperationContextImpl txn;
            Lock::DBWrite lk(txn.lockState(), ns());
            Client::Context ctx(ns());
            
            Database* db = ctx.db();
            Collection* coll = db->getCollection(&txn, ns());
            if (!coll) {
                coll = db->createCollection(&txn, ns());
            }

            if (o.hasField("_id")) {
                coll->insertDocument(&txn, o, true);
                return;
            }

            class BSONObjBuilder b;
            OID id;
            id.init();
            b.appendOID("_id", &id);
            b.appendElements(o);
            coll->insertDocument(&txn, b.obj(), true);
        }
Exemple #18
0
    TEST(DBHelperTests, FindDiskLocsNoIndex) {

        DBDirectClient client;
        OperationContextImpl txn;

        client.remove( ns, BSONObj() );
        client.insert( ns, BSON( "_id" << OID::gen() ) );

        long long maxSizeBytes = 1024 * 1024 * 1024;

        set<DiskLoc> locs;
        long long numDocsFound;
        long long estSizeBytes;
        {
            Lock::DBRead lk(txn.lockState(), ns);
            Client::Context ctx( ns );

            // search invalid index range
            KeyRange range( ns,
                            BSON( "badIndex" << 0 ),
                            BSON( "badIndex" << 10 ),
                            BSON( "badIndex" << 1 ) );

            Status result = Helpers::getLocsInRange( &txn,
                                                     range,
                                                     maxSizeBytes,
                                                     &locs,
                                                     &numDocsFound,
                                                     &estSizeBytes );

            // Make sure we get the right error code
            ASSERT_EQUALS( result.code(), ErrorCodes::IndexNotFound );
            ASSERT_EQUALS( static_cast<long long>( locs.size() ), 0 );
            ASSERT_EQUALS( numDocsFound, 0 );
            ASSERT_EQUALS( estSizeBytes, 0 );
        }
    }
Exemple #19
0
    void IndexBuilder::run() {
        LOG(2) << "IndexBuilder building index " << _index;

        OperationContextImpl txn;

        Client::initThread(name().c_str());
        Lock::ParallelBatchWriterMode::iAmABatchParticipant(txn.lockState());

        cc().getAuthorizationSession()->grantInternalAuthorization();

        txn.getCurOp()->reset(HostAndPort(), dbInsert);
        NamespaceString ns(_index["ns"].String());
        Client::WriteContext ctx(&txn, ns.getSystemIndexesCollection());

        Database* db = dbHolder().get(&txn, ns.db().toString());

        Status status = build(&txn, db, true);
        if ( !status.isOK() ) {
            log() << "IndexBuilder could not build index: " << status.toString();
        }
        ctx.commit();

        txn.getClient()->shutdown();
    }
Exemple #20
0
        void run() {
            OperationContextImpl txn;
            DBDirectClient client(&txn);

            for ( int i = 0; i < 10; ++i ) {
                client.insert( ns, BSON( "_id" << i ) );
            }

            {
                // Remove _id range [_min, _max).
                Lock::DBLock lk(txn.lockState(), nsToDatabaseSubstring(ns), MODE_X);
                Client::Context ctx(&txn,  ns );

                KeyRange range( ns,
                                BSON( "_id" << _min ),
                                BSON( "_id" << _max ),
                                BSON( "_id" << 1 ) );
                mongo::WriteConcernOptions dummyWriteConcern;
                Helpers::removeRange(&txn, range, false, dummyWriteConcern);
            }

            // Check that the expected documents remain.
            ASSERT_EQUALS( expected(), docs(&txn) );
        }
Exemple #21
0
    // This free function is used by the writer threads to apply each op
    void multiSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
        initializeWriterThread();

        OperationContextImpl txn;

        // allow us to get through the magic barrier
        Lock::ParallelBatchWriterMode::iAmABatchParticipant(txn.lockState());

        bool convertUpdatesToUpserts = true;

        for (std::vector<BSONObj>::const_iterator it = ops.begin();
             it != ops.end();
             ++it) {
            try {
                if (!st->syncApply(&txn, *it, convertUpdatesToUpserts)) {
                    fassertFailedNoTrace(16359);
                }
            } catch (const DBException& e) {
                error() << "writer worker caught exception: " << causedBy(e)
                        << " on: " << it->toString() << endl;
                fassertFailedNoTrace(16360);
            }
        }
    }
Exemple #22
0
        static void durThreadGroupCommit() {
            OperationContextImpl txn;
            SimpleMutex::scoped_lock flk(filesLockedFsync);

            const int N = 10;
            static int n;
            if (privateMapBytes < UncommittedBytesLimit && ++n % N &&
                (storageGlobalParams.durOptions &
                 StorageGlobalParams::DurAlwaysRemap) == 0) {
                // limited locks version doesn't do any remapprivateview at all, so only try this if privateMapBytes
                // is in an acceptable range.  also every Nth commit, we do everything so we can do some remapping;
                // remapping a lot all at once could cause jitter from a large amount of copy-on-writes all at once.
                if( groupCommitWithLimitedLocks(&txn) )
                    return;
            }

            // we get a write lock, downgrade, do work, upgrade, finish work.
            // getting a write lock is helpful also as we need to be greedy and not be starved here
            // note our "stopgreed" parm -- to stop greed by others while we are working. you can't write 
            // anytime soon anyway if we are journaling for a while, that was the idea.
            Lock::GlobalWrite w(txn.lockState());
            w.downgrade();
            groupCommit(&txn, &w);
        }
Exemple #23
0
    // This free function is used by the initial sync writer threads to apply each op
    void multiInitialSyncApply(const std::vector<BSONObj>& ops, SyncTail* st) {
        initializeWriterThread();

        OperationContextImpl txn;

        // allow us to get through the magic barrier
        Lock::ParallelBatchWriterMode::iAmABatchParticipant(txn.lockState());

        for (std::vector<BSONObj>::const_iterator it = ops.begin();
             it != ops.end();
             ++it) {
            try {
                if (!st->syncApply(&txn, *it)) {
                    bool status;
                    {
                        Lock::GlobalWrite lk(txn.lockState());
                        status = st->shouldRetry(&txn, *it);
                    }

                    if (status) {
                        // retry
                        if (!st->syncApply(&txn, *it)) {
                            fassertFailedNoTrace(15915);
                        }
                    }
                    // If shouldRetry() returns false, fall through.
                    // This can happen if the document that was moved and missed by Cloner
                    // subsequently got deleted and no longer exists on the Sync Target at all
                }
            }
            catch (const DBException& e) {
                error() << "exception: " << causedBy(e) << " on: " << it->toString() << endl;
                fassertFailedNoTrace(16361);
            }
        }
    }
Exemple #24
0
 CappedInitialSync() : 
         _cappedNs("unittests.foo.bar"), _lk(_txn.lockState(), _cappedNs) {
     dropCapped();
     create();
 }
Exemple #25
0
static void repairDatabasesAndCheckVersion() {
    LOG(1) << "enter repairDatabases (to check pdfile version #)" << endl;

    OperationContextImpl txn;
    ScopedTransaction transaction(&txn, MODE_X);
    Lock::GlobalWrite lk(txn.lockState());

    vector<string> dbNames;

    StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
    storageEngine->listDatabases(&dbNames);

    // Repair all databases first, so that we do not try to open them if they are in bad shape
    if (storageGlobalParams.repair) {
        for (vector<string>::const_iterator i = dbNames.begin(); i != dbNames.end(); ++i) {
            const string dbName = *i;
            LOG(1) << "    Repairing database: " << dbName << endl;

            fassert(18506, repairDatabase(&txn, storageEngine, dbName));
        }
    }

    const repl::ReplSettings& replSettings = repl::getGlobalReplicationCoordinator()->getSettings();

    // On replica set members we only clear temp collections on DBs other than "local" during
    // promotion to primary. On pure slaves, they are only cleared when the oplog tells them
    // to. The local DB is special because it is not replicated.  See SERVER-10927 for more
    // details.
    const bool shouldClearNonLocalTmpCollections =
        !(checkIfReplMissingFromCommandLine(&txn) || replSettings.usingReplSets() ||
          replSettings.slave == repl::SimpleSlave);

    for (vector<string>::const_iterator i = dbNames.begin(); i != dbNames.end(); ++i) {
        const string dbName = *i;
        LOG(1) << "    Recovering database: " << dbName << endl;

        Database* db = dbHolder().openDb(&txn, dbName);
        invariant(db);

        // First thing after opening the database is to check for file compatibility,
        // otherwise we might crash if this is a deprecated format.
        if (!db->getDatabaseCatalogEntry()->currentFilesCompatible(&txn)) {
            log() << "****";
            log() << "cannot do this upgrade without an upgrade in the middle";
            log() << "please do a --repair with 2.6 and then start this version";
            dbexit(EXIT_NEED_UPGRADE);
            return;
        }

        // Major versions match, check indexes
        const string systemIndexes = db->name() + ".system.indexes";

        Collection* coll = db->getCollection(systemIndexes);
        unique_ptr<PlanExecutor> exec(InternalPlanner::collectionScan(&txn, systemIndexes, coll));

        BSONObj index;
        PlanExecutor::ExecState state;
        while (PlanExecutor::ADVANCED == (state = exec->getNext(&index, NULL))) {
            const BSONObj key = index.getObjectField("key");
            const string plugin = IndexNames::findPluginName(key);

            if (db->getDatabaseCatalogEntry()->isOlderThan24(&txn)) {
                if (IndexNames::existedBefore24(plugin)) {
                    continue;
                }

                log() << "Index " << index << " claims to be of type '" << plugin << "', "
                      << "which is either invalid or did not exist before v2.4. "
                      << "See the upgrade section: "
                      << "http://dochub.mongodb.org/core/upgrade-2.4" << startupWarningsLog;
            }

            const Status keyStatus = validateKeyPattern(key);
            if (!keyStatus.isOK()) {
                log() << "Problem with index " << index << ": " << keyStatus.reason()
                      << " This index can still be used however it cannot be rebuilt."
                      << " For more info see"
                      << " http://dochub.mongodb.org/core/index-validation" << startupWarningsLog;
            }
        }

        if (PlanExecutor::IS_EOF != state) {
            warning() << "Internal error while reading collection " << systemIndexes;
        }

        if (replSettings.usingReplSets()) {
            // We only care about the _id index if we are in a replset
            checkForIdIndexes(&txn, db);
        }

        if (shouldClearNonLocalTmpCollections || dbName == "local") {
            db->clearTmpCollections(&txn);
        }
    }

    LOG(1) << "done repairDatabases" << endl;
}
Exemple #26
0
    // ran at startup.
    static void repairDatabasesAndCheckVersion(bool shouldClearNonLocalTmpCollections) {
        LOG(1) << "enter repairDatabases (to check pdfile version #)" << endl;

        OperationContextImpl txn;

        Lock::GlobalWrite lk(txn.lockState());

        vector< string > dbNames;
        getDatabaseNames( dbNames );
        for ( vector< string >::iterator i = dbNames.begin(); i != dbNames.end(); ++i ) {
            string dbName = *i;
            LOG(1) << "\t" << dbName << endl;

            Client::Context ctx( dbName );
            DataFile *p = ctx.db()->getExtentManager()->getFile(&txn, 0);
            DataFileHeader *h = p->getHeader();

            if (repl::replSettings.usingReplSets()) {
                // we only care about the _id index if we are in a replset
                checkForIdIndexes(&txn, ctx.db());
            }

            if (shouldClearNonLocalTmpCollections || dbName == "local")
                ctx.db()->clearTmpCollections(&txn);

            if (!h->isCurrentVersion() || mongodGlobalParams.repair) {

                if( h->version <= 0 ) {
                    uasserted(14026,
                      str::stream() << "db " << dbName << " appears corrupt pdfile version: " << h->version
                                    << " info: " << h->versionMinor << ' ' << h->fileLength);
                }

                if ( !h->isCurrentVersion() ) {
                    log() << "****" << endl;
                    log() << "****" << endl;
                    log() << "need to upgrade database " << dbName << " "
                          << "with pdfile version " << h->version << "." << h->versionMinor << ", "
                          << "new version: "
                          << PDFILE_VERSION << "." << PDFILE_VERSION_MINOR_22_AND_OLDER
                          << endl;
                }

                if (mongodGlobalParams.upgrade) {
                    // QUESTION: Repair even if file format is higher version than code?
                    doDBUpgrade( dbName, h );
                }
                else {
                    log() << "\t Not upgrading, exiting" << endl;
                    log() << "\t run --upgrade to upgrade dbs, then start again" << endl;
                    log() << "****" << endl;
                    dbexit( EXIT_NEED_UPGRADE );
                    mongodGlobalParams.upgrade = 1;
                    return;
                }
            }
            else {
                const string systemIndexes = ctx.db()->name() + ".system.indexes";
                Collection* coll = ctx.db()->getCollection( &txn, systemIndexes );
                auto_ptr<Runner> runner(InternalPlanner::collectionScan(systemIndexes,coll));
                BSONObj index;
                Runner::RunnerState state;
                while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&index, NULL))) {
                    const BSONObj key = index.getObjectField("key");
                    const string plugin = IndexNames::findPluginName(key);

                    if (h->versionMinor == PDFILE_VERSION_MINOR_22_AND_OLDER) {
                        if (IndexNames::existedBefore24(plugin))
                            continue;

                        log() << "Index " << index << " claims to be of type '" << plugin << "', "
                              << "which is either invalid or did not exist before v2.4. "
                              << "See the upgrade section: "
                              << "http://dochub.mongodb.org/core/upgrade-2.4"
                              << startupWarningsLog;
                    }

                    const Status keyStatus = validateKeyPattern(key);
                    if (!keyStatus.isOK()) {
                        log() << "Problem with index " << index << ": " << keyStatus.reason()
                              << " This index can still be used however it cannot be rebuilt."
                              << " For more info see"
                              << " http://dochub.mongodb.org/core/index-validation"
                              << startupWarningsLog;
                    }
                }

                if (Runner::RUNNER_EOF != state) {
                    warning() << "Internal error while reading collection " << systemIndexes;
                }

                Database::closeDatabase(&txn, dbName.c_str(), storageGlobalParams.dbpath);
            }
        }

        LOG(1) << "done repairDatabases" << endl;

        if (mongodGlobalParams.upgrade) {
            log() << "finished checking dbs" << endl;
            cc().shutdown();
            dbexit( EXIT_CLEAN );
        }
    }
/**
 * The main durability thread loop. There is a single instance of this function running.
 */
static void durThread() {
    Client::initThread("durability");

    log() << "Durability thread started";

    bool samePartition = true;
    try {
        const std::string dbpathDir = boost::filesystem::path(storageGlobalParams.dbpath).string();
        samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
    } catch (...) {
    }

    // Spawn the journal writer thread
    JournalWriter journalWriter(&commitNotify, &applyToDataFilesNotify, NumAsyncJournalWrites);
    journalWriter.start();

    // Used as an estimate of how much / how fast to remap
    uint64_t commitCounter(0);
    uint64_t estimatedPrivateMapSize(0);
    uint64_t remapLastTimestamp(0);

    while (shutdownRequested.loadRelaxed() == 0) {
        unsigned ms = storageGlobalParams.journalCommitIntervalMs;
        if (ms == 0) {
            ms = samePartition ? 100 : 30;
        }

        // +1 so it never goes down to zero
        const unsigned oneThird = (ms / 3) + 1;

        // Reset the stats based on the reset interval
        if (stats.curr()->getCurrentDurationMillis() > DurStatsResetIntervalMillis) {
            stats.reset();
        }

        try {
            stdx::unique_lock<stdx::mutex> lock(flushMutex);

            for (unsigned i = 0; i <= 2; i++) {
                if (stdx::cv_status::no_timeout ==
                    flushRequested.wait_for(lock, Milliseconds(oneThird))) {
                    // Someone forced a flush
                    break;
                }

                if (commitNotify.nWaiting()) {
                    // One or more getLastError j:true is pending
                    break;
                }

                if (commitJob.bytes() > UncommittedBytesLimit / 2) {
                    // The number of written bytes is growing
                    break;
                }
            }

            // The commit logic itself
            LOG(4) << "groupCommit begin";

            Timer t;

            OperationContextImpl txn;
            AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(txn.lockState());

            // We need to snapshot the commitNumber after the flush lock has been obtained,
            // because at this point we know that we have a stable snapshot of the data.
            const NotifyAll::When commitNumber(commitNotify.now());

            LOG(4) << "Processing commit number " << commitNumber;

            if (!commitJob.hasWritten()) {
                // We do not need the journal lock anymore. Free it here, for the really
                // unlikely possibility that the writeBuffer command below blocks.
                autoFlushLock.release();

                // getlasterror request could have came after the data was already committed.
                // No need to call committingReset though, because we have not done any
                // writes (hasWritten == false).
                JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
                buffer->setNoop();

                journalWriter.writeBuffer(buffer, commitNumber);
            } else {
                // This copies all the in-memory changes into the journal writer's buffer.
                JournalWriter::Buffer* const buffer = journalWriter.newBuffer();
                PREPLOGBUFFER(buffer->getHeader(), buffer->getBuilder());

                estimatedPrivateMapSize += commitJob.bytes();
                commitCounter++;

                // Now that the write intents have been copied to the buffer, the commit job is
                // free to be reused. We need to reset the commit job's contents while under
                // the S flush lock, because otherwise someone might have done a write and this
                // would wipe out their changes without ever being committed.
                commitJob.committingReset();

                double systemMemoryPressurePercentage =
                    ProcessInfo::getSystemMemoryPressurePercentage();

                // Now that the in-memory modifications have been collected, we can potentially
                // release the flush lock if remap is not necessary.
                // When we remap due to memory pressure, we look at two criteria
                // 1. If the amount of 4k pages touched exceeds 512 MB,
                //    a reasonable estimate of memory pressure on Linux.
                // 2. Check if the amount of free memory on the machine is running low,
                //    since #1 is underestimates the memory pressure on Windows since
                //    commits in 64MB chunks.
                const bool shouldRemap = (estimatedPrivateMapSize >= UncommittedBytesLimit) ||
                    (systemMemoryPressurePercentage > 0.0) ||
                    (commitCounter % NumCommitsBeforeRemap == 0) ||
                    (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap);

                double remapFraction = 0.0;

                if (shouldRemap) {
                    // We want to remap all private views about every 2 seconds. There could be
                    // ~1000 views so we do a little each pass. There will be copy on write
                    // faults after remapping, so doing a little bit at a time will avoid big
                    // load spikes when the pages are touched.
                    //
                    // TODO: Instead of the time-based logic above, consider using ProcessInfo
                    //       and watching for getResidentSize to drop, which is more precise.
                    remapFraction = (curTimeMicros64() - remapLastTimestamp) / 2000000.0;

                    if (mmapv1GlobalOptions.journalOptions & MMAPV1Options::JournalAlwaysRemap) {
                        remapFraction = 1;
                    } else {
                        // We don't want to get close to the UncommittedBytesLimit
                        const double remapMemFraction =
                            estimatedPrivateMapSize / ((double)UncommittedBytesLimit);

                        remapFraction = std::max(remapMemFraction, remapFraction);

                        remapFraction = std::max(systemMemoryPressurePercentage, remapFraction);
                    }
                } else {
                    LOG(4) << "Early release flush lock";

                    // We will not be doing a remap so drop the flush lock. That way we will be
                    // doing the journal I/O outside of lock, so other threads can proceed.
                    invariant(!shouldRemap);
                    autoFlushLock.release();
                }

                // Request async I/O to the journal. This may block.
                journalWriter.writeBuffer(buffer, commitNumber);

                // Data has now been written to the shared view. If remap was requested, we
                // would still be holding the S flush lock here, so just upgrade it and
                // perform the remap.
                if (shouldRemap) {
                    // Need to wait for the previously scheduled journal writes to complete
                    // before any remap is attempted.
                    journalWriter.flush();
                    journalWriter.assertIdle();

                    // Upgrading the journal lock to flush stops all activity on the system,
                    // because we will be remapping memory and we don't want readers to be
                    // accessing it. Technically this step could be avoided on systems, which
                    // support atomic remap.
                    autoFlushLock.upgradeFlushLockToExclusive();
                    remapPrivateView(remapFraction);

                    autoFlushLock.release();

                    // Reset the private map estimate outside of the lock
                    estimatedPrivateMapSize = 0;
                    remapLastTimestamp = curTimeMicros64();

                    stats.curr()->_commitsInWriteLock++;
                    stats.curr()->_commitsInWriteLockMicros += t.micros();
                }
            }

            stats.curr()->_commits++;
            stats.curr()->_commitsMicros += t.micros();

            LOG(4) << "groupCommit end";
        } catch (DBException& e) {
            severe() << "dbexception in durThread causing immediate shutdown: " << e.toString();
            invariant(false);
        } catch (std::ios_base::failure& e) {
            severe() << "ios_base exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (std::bad_alloc& e) {
            severe() << "bad_alloc exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (std::exception& e) {
            severe() << "exception in durThread causing immediate shutdown: " << e.what();
            invariant(false);
        } catch (...) {
            severe() << "unhandled exception in durThread causing immediate shutdown";
            invariant(false);
        }
    }

    // Stops the journal thread and ensures everything was written
    invariant(!commitJob.hasWritten());

    journalWriter.flush();
    journalWriter.shutdown();

    log() << "Durability thread stopped";
}
Exemple #28
0
        static void durThread() {
            Client::initThread("journal");

            bool samePartition = true;
            try {
                const std::string dbpathDir =
                    boost::filesystem::path(storageGlobalParams.dbpath).string();
                samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
            }
            catch(...) {

            }

            while (shutdownRequested.loadRelaxed() == 0) {
                unsigned ms = storageGlobalParams.journalCommitInterval;
                if( ms == 0 ) { 
                    ms = samePartition ? 100 : 30;
                }

                unsigned oneThird = (ms / 3) + 1; // +1 so never zero

                try {
                    stats.rotate();

                    boost::mutex::scoped_lock lock(flushMutex);

                    // commit sooner if one or more getLastError j:true is pending
                    for (unsigned i = 0; i <= 2; i++) {
                        if (flushRequested.timed_wait(lock,
                                                      Milliseconds(oneThird))) {
                            // Someone forced a flush
                            break;
                        }

                        if (commitJob._notify.nWaiting())
                            break;
                        if (commitJob.bytes() > UncommittedBytesLimit / 2)
                            break;
                    }

                    OperationContextImpl txn;

                    // Waits for all active operations to drain and won't let new ones start. This
                    // should be optimized to allow readers in (see SERVER-15262).
                    AutoAcquireFlushLockForMMAPV1Commit flushLock(txn.lockState());

                    groupCommit();
                    remapPrivateView();
                }
                catch(std::exception& e) {
                    log() << "exception in durThread causing immediate shutdown: " << e.what() << endl;
                    mongoAbort("exception in durThread");
                }
                catch (...) {
                    log() << "unhandled exception in durThread causing immediate shutdown" << endl;
                    mongoAbort("unhandled exception in durThread");
                }
            }

            cc().shutdown();
        }
Exemple #29
0
    void createOplog() {
        OperationContextImpl txn;
        Lock::GlobalWrite lk(txn.lockState());

        const char * ns = "local.oplog.$main";

        bool rs = !replSettings.replSet.empty();
        if( rs )
            ns = rsoplog;

        Client::Context ctx(ns);
        Collection* collection = ctx.db()->getCollection( &txn, ns );

        if ( collection ) {

            if (replSettings.oplogSize != 0) {
                int o = (int)(collection->getRecordStore()->storageSize() / ( 1024 * 1024 ) );
                int n = (int)(replSettings.oplogSize / (1024 * 1024));
                if ( n != o ) {
                    stringstream ss;
                    ss << "cmdline oplogsize (" << n << ") different than existing (" << o << ") see: http://dochub.mongodb.org/core/increase-oplog";
                    log() << ss.str() << endl;
                    throw UserException( 13257 , ss.str() );
                }
            }

            if( rs ) return;

            initOpTimeFromOplog(&txn, ns);
            return;
        }

        /* create an oplog collection, if it doesn't yet exist. */
        long long sz = 0;
        if ( replSettings.oplogSize != 0 ) {
            sz = replSettings.oplogSize;
        }
        else {
            /* not specified. pick a default size */
            sz = 50LL * 1024LL * 1024LL;
            if ( sizeof(int *) >= 8 ) {
#if defined(__APPLE__)
                // typically these are desktops (dev machines), so keep it smallish
                sz = (256-64) * 1024 * 1024;
#else
                sz = 990LL * 1024 * 1024;
                double free =
                    File::freeSpace(storageGlobalParams.dbpath); //-1 if call not supported.
                long long fivePct = static_cast<long long>( free * 0.05 );
                if ( fivePct > sz )
                    sz = fivePct;
                // we use 5% of free space up to 50GB (1TB free)
                static long long upperBound = 50LL * 1024 * 1024 * 1024;
                if (fivePct > upperBound)
                    sz = upperBound;
#endif
            }
        }

        log() << "******" << endl;
        log() << "creating replication oplog of size: " << (int)( sz / ( 1024 * 1024 ) ) << "MB..." << endl;

        CollectionOptions options;
        options.capped = true;
        options.cappedSize = sz;
        options.autoIndexId = CollectionOptions::NO;

        invariant( ctx.db()->createCollection( &txn, ns, options ) );
        if( !rs )
            logOp( &txn, "n", "", BSONObj() );

        /* sync here so we don't get any surprising lag later when we try to sync */
        MemoryMappedFile::flushAll(true);
        log() << "******" << endl;
    }
Exemple #30
0
    /* tail an oplog.  ok to return, will be re-called. */
    void SyncTail::oplogApplication() {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        while(!inShutdown()) {
            OpQueue ops;
            OperationContextImpl txn;

            Timer batchTimer;
            int lastTimeChecked = 0;

            do {
                int now = batchTimer.seconds();

                // apply replication batch limits
                if (!ops.empty()) {
                    if (now > replBatchLimitSeconds)
                        break;
                    if (ops.getDeque().size() > replBatchLimitOperations)
                        break;
                }
                // occasionally check some things
                // (always checked in the first iteration of this do-while loop, because
                // ops is empty)
                if (ops.empty() || now > lastTimeChecked) {
                    BackgroundSync* bgsync = BackgroundSync::get();
                    if (bgsync->getInitialSyncRequestedFlag()) {
                        // got a resync command
                        Lock::DBLock lk(txn.lockState(), "local", MODE_X);
                        WriteUnitOfWork wunit(&txn);
                        Client::Context ctx(&txn, "local");

                        ctx.db()->dropCollection(&txn, "local.oplog.rs");

                        // Note: the following order is important.
                        // The bgsync thread uses an empty optime as a sentinel to know to wait
                        // for initial sync (done in this thread after we return); thus, we must
                        // ensure the lastAppliedOptime is empty before pausing the bgsync thread
                        // via stop().
                        // We must clear the sync source blacklist after calling stop()
                        // because the bgsync thread, while running, may update the blacklist.
                        replCoord->setMyLastOptime(&txn, OpTime());
                        bgsync->stop();
                        replCoord->clearSyncSourceBlacklist();

                        wunit.commit();

                        return;
                    }
                    lastTimeChecked = now;
                    // can we become secondary?
                    // we have to check this before calling mgr, as we must be a secondary to
                    // become primary
                    tryToGoLiveAsASecondary(&txn, replCoord);

                    // TODO(emilkie): This can be removed once we switch over from legacy;
                    // this code is what moves 1-node sets to PRIMARY state.
                    // normally msgCheckNewState gets called periodically, but in a single node
                    // replset there are no heartbeat threads, so we do it here to be sure.  this is
                    // relevant if the singleton member has done a stepDown() and needs to come back
                    // up.
                    if (theReplSet &&
                            theReplSet->config().members.size() == 1 &&
                            theReplSet->myConfig().potentiallyHot()) {
                        Manager* mgr = theReplSet->mgr;
                        // When would mgr be null?  During replsettest'ing, in which case we should
                        // fall through and actually apply ops as if we were a real secondary.
                        if (mgr) { 
                            mgr->send(stdx::bind(&Manager::msgCheckNewState, theReplSet->mgr));
                            sleepsecs(1);
                            // There should never be ops to sync in a 1-member set, anyway
                            return;
                        }
                    }
                }

                const int slaveDelaySecs = replCoord->getSlaveDelaySecs().total_seconds();
                if (!ops.empty() && slaveDelaySecs > 0) {
                    const BSONObj& lastOp = ops.getDeque().back();
                    const unsigned int opTimestampSecs = lastOp["ts"]._opTime().getSecs();

                    // Stop the batch as the lastOp is too new to be applied. If we continue
                    // on, we can get ops that are way ahead of the delay and this will
                    // make this thread sleep longer when handleSlaveDelay is called
                    // and apply ops much sooner than we like.
                    if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) {
                        break;
                    }
                }
                // keep fetching more ops as long as we haven't filled up a full batch yet
            } while (!tryPopAndWaitForMore(&ops, replCoord) && // tryPopAndWaitForMore returns true 
                                                               // when we need to end a batch early
                   (ops.getSize() < replBatchLimitBytes) &&
                   !inShutdown());

            // For pausing replication in tests
            while (MONGO_FAIL_POINT(rsSyncApplyStop)) {
                sleepmillis(0);
            }

            if (ops.empty()) {
                continue;
            }

            const BSONObj& lastOp = ops.getDeque().back();
            handleSlaveDelay(lastOp);

            if (replCoord->getCurrentMemberState().primary() && 
                !replCoord->isWaitingForApplierToDrain()) {
                severe() << "attempting to replicate ops while primary";
                fassertFailed(28527);
            }

            // Set minValid to the last op to be applied in this next batch.
            // This will cause this node to go into RECOVERING state
            // if we should crash and restart before updating the oplog
            OpTime minValid = lastOp["ts"]._opTime();
            setMinValid(&txn, minValid);

            multiApply(ops.getDeque());

            applyOpsToOplog(&ops.getDeque());

            // If we're just testing (no manager), don't keep looping if we exhausted the bgqueue
            // TODO(spencer): Remove repltest.cpp dbtest or make this work with the new replication
            // coordinator
            if (theReplSet && !theReplSet->mgr) {
                BSONObj op;
                if (!peek(&op)) {
                    return;
                }
            }
        }
    }