bool SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getCurrentMemberState().primary()) {
            // primary has no one to update to
            return true;
        }
        BSONObjBuilder cmd;
        {
            boost::unique_lock<boost::mutex> lock(_mtx);
            if (_handshakeNeeded) {
                // Don't send updates if there are nodes that haven't yet been handshaked
                return false;
            }
            replCoord->prepareReplSetUpdatePositionCommand(txn, &cmd);
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        bool ok;
        try {
            ok = _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            _resetConnection();
            return false;
        }
        if (!ok) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            _resetConnection();
            return false;
        }
        return true;
    }
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getCurrentMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            boost::unique_lock<boost::mutex> lock(_mtx);
            if (_handshakeNeeded) {
                // Don't send updates if there are nodes that haven't yet been handshaked
                return Status(ErrorCodes::NodeNotFound,
                              "Need to send handshake before updating position upstream");
            }
            replCoord->prepareReplSetUpdatePositionCommand(txn, &cmd);
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            _resetConnection();
        }
        return status;
    }
    void SyncSourceFeedback::run() {
        Client::initThread("SyncSourceFeedbackThread");
        OperationContextImpl txn;

        bool positionChanged = false;
        bool handshakeNeeded = false;
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
            {
                boost::unique_lock<boost::mutex> lock(_mtx);
                while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
                    _cond.wait(lock);
                }

                if (_shutdownSignaled) {
                    break;
                }

                positionChanged = _positionChanged;
                handshakeNeeded = _handshakeNeeded;
                _positionChanged = false;
                _handshakeNeeded = false;
            }

            MemberState state = replCoord->getCurrentMemberState();
            if (state.primary() || state.fatal() || state.startup()) {
                continue;
            }
            const Member* target = BackgroundSync::get()->getSyncTarget();
            if (_syncTarget != target) {
                _resetConnection();
                _syncTarget = target;
            }
            if (!hasConnection()) {
                // fix connection if need be
                if (!target) {
                    sleepmillis(500);
                    continue;
                }
                if (!_connect(&txn, target->fullName())) {
                    sleepmillis(500);
                    continue;
                }
            }
            if (handshakeNeeded) {
                if (!replHandshake(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _handshakeNeeded = true;
                    continue;
                }
            }
            if (positionChanged) {
                if (!updateUpstream(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _positionChanged = true;
                }
            }
        }
        cc().shutdown();
    }
Beispiel #4
0
void SyncTail::handleSlaveDelay(const BSONObj& lastOp) {
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    int slaveDelaySecs = durationCount<Seconds>(replCoord->getSlaveDelaySecs());

    // ignore slaveDelay if the box is still initializing. once
    // it becomes secondary we can worry about it.
    if (slaveDelaySecs > 0 && replCoord->getMemberState().secondary()) {
        const Timestamp ts = lastOp["ts"].timestamp();
        long long a = ts.getSecs();
        long long b = time(0);
        long long lag = b - a;
        long long sleeptime = slaveDelaySecs - lag;
        if (sleeptime > 0) {
            uassert(12000,
                    "rs slaveDelay differential too big check clocks and systems",
                    sleeptime < 0x40000000);
            if (sleeptime < 60) {
                sleepsecs((int)sleeptime);
            } else {
                warning() << "slavedelay causing a long sleep of " << sleeptime << " seconds";
                // sleep(hours) would prevent reconfigs from taking effect & such!
                long long waitUntil = b + sleeptime;
                while (time(0) < waitUntil) {
                    sleepsecs(6);

                    // Handle reconfigs that changed the slave delay
                    if (durationCount<Seconds>(replCoord->getSlaveDelaySecs()) != slaveDelaySecs)
                        break;
                }
            }
        }
    }  // endif slaveDelay
}
Beispiel #5
0
void SyncSourceFeedback::run() {
    Client::initThread("SyncSourceFeedback");

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    while (true) {  // breaks once _shutdownSignaled is true
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, _keepAliveInterval) == stdx::cv_status::timeout) {
                    break;
                }
            }

            if (_shutdownSignaled) {
                break;
            }

            _positionChanged = false;
        }

        auto txn = cc().makeOperationContext();
        MemberState state = replCoord->getMemberState();
        if (state.primary() || state.startup()) {
            _resetConnection();
            continue;
        }
        const HostAndPort target = BackgroundSync::get()->getSyncTarget();
        if (_syncTarget != target) {
            _resetConnection();
            _syncTarget = target;
        }
        if (!hasConnection()) {
            // fix connection if need be
            if (target.empty()) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
            if (!_connect(txn.get(), target)) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
        }
        Status status = updateUpstream(txn.get());
        if (!status.isOK()) {
            sleepmillis(500);
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            _positionChanged = true;
        }
    }
}
    bool SyncSourceFeedback::replHandshake(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getCurrentMemberState().primary()) {
            // primary has no one to handshake to
            return true;
        }
        // construct a vector of handshake obj for us as well as all chained members
        std::vector<BSONObj> handshakeObjs;
        replCoord->prepareReplSetUpdatePositionCommandHandshakes(txn, &handshakeObjs);
        LOG(1) << "handshaking upstream updater";
        for (std::vector<BSONObj>::iterator it = handshakeObjs.begin();
                it != handshakeObjs.end();
                ++it) {
            BSONObj res;
            try {
                LOG(2) << "Sending to " << _connection.get()->toString() << " the replication "
                        "handshake: " << *it;
                if (!_connection->runCommand("admin", *it, res)) {
                    std::string errMsg = res["errmsg"].valuestrsafe();
                    massert(17447, "upstream updater is not supported by the member from which we"
                            " are syncing, please update all nodes to 2.6 or later.",
                            errMsg.find("no such cmd") == std::string::npos);

                    log() << "replSet error while handshaking the upstream updater: "
                        << errMsg;

                    // sleep half a second if we are not in our sync source's config
                    // TODO(dannenberg) after 2.8, remove the string comparison 
                    if (res["code"].numberInt() == ErrorCodes::NodeNotFound ||
                            errMsg.find("could not be found in replica set config while attempting "
                                        "to associate it with") != std::string::npos) {

                        // black list sync target for 10 seconds and find a new one
                        replCoord->blacklistSyncSource(_syncTarget,
                                                       Date_t(curTimeMillis64() + 10*1000));
                        BackgroundSync::get()->clearSyncTarget();
                    }

                    _resetConnection();
                    return false;
                }
            }
            catch (const DBException& e) {
                log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl;
                _resetConnection();
                return false;
            }
        }
        return true;
    }
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            boost::unique_lock<boost::mutex> lock(_mtx);
            if (_handshakeNeeded) {
                // Don't send updates if there are nodes that haven't yet been handshaked
                return Status(ErrorCodes::NodeNotFound,
                              "Need to send handshake before updating position upstream");
            }
            // the command could not be created, likely because the node was removed from the set
            if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
                return Status::OK();
            }
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget,
                                           Date_t(curTimeMillis64() + 500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget,
                                           Date_t(curTimeMillis64() + 500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
        }
        return status;
    }
Beispiel #8
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {

            ScopedTransaction transaction(txn, MODE_X);
            Lock::GlobalWrite globalWriteLock(txn->lockState());

            ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
            if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
                const MemberState memberState = replCoord->getMemberState();
                if (memberState.startup()) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotYetInitialized,
                                                              "no replication yet active"));
                }
                if (memberState.primary() ||
                        !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotSecondary,
                                                              "primaries cannot resync"));
                }
                BackgroundSync::get()->setInitialSyncRequestedFlag(true);
                return true;
            }

            // below this comment pertains only to master/slave replication
            if ( cmdObj.getBoolField( "force" ) ) {
                if ( !waitForSyncToFinish(txn, errmsg ) )
                    return false;
                replAllDead = "resync forced";
            }
            // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
            if (!replAllDead) {
                errmsg = "not dead, no need to resync";
                return false;
            }
            if ( !waitForSyncToFinish(txn, errmsg ) )
                return false;

            ReplSource::forceResyncDead( txn, "client" );
            result.append( "info", "triggered resync for all sources" );

            return true;
        }
    BSONObj generateSection(OperationContext* opCtx, const BSONElement& configElement) const {
        ReplicationCoordinator* replCoord = ReplicationCoordinator::get(opCtx);
        if (!replCoord->isReplEnabled()) {
            return BSONObj();
        }

        BSONObjBuilder result;
        // TODO(siyuan) Output term of OpTime
        result.append("latestOptime", replCoord->getMyLastAppliedOpTime().getTimestamp());

        BSONObj o;
        uassert(17347,
                "Problem reading earliest entry from oplog",
                Helpers::getSingleton(opCtx, NamespaceString::kRsOplogNamespace.ns().c_str(), o));
        result.append("earliestOptime", o["ts"].timestamp());
        return result.obj();
    }
Beispiel #10
0
    /** write an op to the oplog that is already built.
        todo : make _logOpRS() call this so we don't repeat ourself?
        */
    OpTime _logOpObjRS(OperationContext* txn, const BSONObj& op) {
        Lock::DBLock lk(txn->lockState(), "local", newlm::MODE_X);
        // XXX soon this needs to be part of an outer WUOW not its own.
        // We can't do this yet due to locking limitations.
        WriteUnitOfWork wunit(txn);

        const OpTime ts = op["ts"]._opTime();
        long long hash = op["h"].numberLong();

        {
            if ( localOplogRSCollection == 0 ) {
                Client::Context ctx(txn, rsoplog);

                localDB = ctx.db();
                verify( localDB );
                localOplogRSCollection = localDB->getCollection(txn, rsoplog);
                massert(13389,
                        "local.oplog.rs missing. did you drop it? if so restart server",
                        localOplogRSCollection);
            }
            Client::Context ctx(txn, rsoplog, localDB);
            checkOplogInsert(localOplogRSCollection->insertDocument(txn, op, false));

            ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
            OpTime myLastOptime = replCoord->getMyLastOptime();
            if (!(myLastOptime < ts)) {
                severe() << "replication oplog stream went back in time. previous timestamp: "
                         << myLastOptime << " newest timestamp: " << ts;
                fassertFailedNoTrace(18905);
            }
            
            BackgroundSync* bgsync = BackgroundSync::get();
            // Keep this up-to-date, in case we step up to primary.
            bgsync->setLastAppliedHash(hash);

            ctx.getClient()->setLastOp( ts );
            
            replCoord->setMyLastOptime(txn, ts);
            bgsync->notify();
        }

        setNewOptime(ts);
        wunit.commit();
        return ts;
    }
// Applies a batch of oplog entries, by using a set of threads to apply the operations and then
// writes the oplog entries to the local oplog.
OpTime SyncTail::multiApply(OperationContext* txn, const OpQueue& ops) {
    invariant(_applyFunc);

    if (getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1()) {
        // Use a ThreadPool to prefetch all the operations in a batch.
        prefetchOps(ops.getDeque(), &_prefetcherPool);
    }

    std::vector<std::vector<BSONObj>> writerVectors(replWriterThreadCount);

    fillWriterVectors(txn, ops.getDeque(), &writerVectors);
    LOG(2) << "replication batch size is " << ops.getDeque().size() << endl;
    // We must grab this because we're going to grab write locks later.
    // We hold this mutex the entire time we're writing; it doesn't matter
    // because all readers are blocked anyway.
    stdx::lock_guard<SimpleMutex> fsynclk(filesLockedFsync);

    // stop all readers until we're done
    Lock::ParallelBatchWriterMode pbwm(txn->lockState());

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    if (replCoord->getMemberState().primary() && !replCoord->isWaitingForApplierToDrain()) {
        severe() << "attempting to replicate ops while primary";
        fassertFailed(28527);
    }

    applyOps(writerVectors, &_writerPool, _applyFunc, this);

    OpTime lastOpTime;
    {
        ON_BLOCK_EXIT([&] { _writerPool.join(); });
        std::vector<BSONObj> raws;
        raws.reserve(ops.getDeque().size());
        for (auto&& op : ops.getDeque()) {
            raws.emplace_back(op.raw);
        }
        lastOpTime = writeOpsToOplog(txn, raws);
        if (inShutdown()) {
            return OpTime();
        }
    }
    // We have now written all database writes and updated the oplog to match.
    return lastOpTime;
}
Beispiel #12
0
    BSONObj generateSection(OperationContext* txn, const BSONElement& configElement) const {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (!replCoord->isReplEnabled()) {
            return BSONObj();
        }

        BSONObjBuilder result;
        // TODO(siyuan) Output term of OpTime
        result.append("latestOptime", replCoord->getMyLastOptime().getTimestamp());

        const std::string& oplogNS =
            replCoord->getReplicationMode() == ReplicationCoordinator::modeReplSet
            ? rsOplogName
            : masterSlaveOplogName;
        BSONObj o;
        uassert(17347,
                "Problem reading earliest entry from oplog",
                Helpers::getSingleton(txn, oplogNS.c_str(), o));
        result.append("earliestOptime", o["ts"].timestamp());
        return result.obj();
    }
Beispiel #13
0
    bool SyncSourceFeedback::replHandshake(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getCurrentMemberState().primary()) {
            // primary has no one to handshake to
            return true;
        }
        // construct a vector of handshake obj for us as well as all chained members
        std::vector<BSONObj> handshakeObjs;
        replCoord->prepareReplSetUpdatePositionCommandHandshakes(txn, &handshakeObjs);
        LOG(1) << "handshaking upstream updater";
        for (std::vector<BSONObj>::iterator it = handshakeObjs.begin();
                it != handshakeObjs.end();
                ++it) {
            BSONObj res;
            try {
                LOG(2) << "Sending to " << _connection.get()->toString() << " the replication "
                        "handshake: " << *it;
                if (!_connection->runCommand("admin", *it, res)) {
                    massert(17447, "upstream updater is not supported by the member from which we"
                            " are syncing, please update all nodes to 2.6 or later.",
                            res["errmsg"].str().find("no such cmd") == std::string::npos);
                    log() << "replSet error while handshaking the upstream updater: "
                        << res["errmsg"].valuestrsafe();

                    _resetConnection();
                    return false;
                }
            }
            catch (const DBException& e) {
                log() << "SyncSourceFeedback error sending handshake: " << e.what() << endl;
                _resetConnection();
                return false;
            }
        }
        return true;
    }
Beispiel #14
0
    Status SyncSourceFeedback::updateUpstream(OperationContext* txn) {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        if (replCoord->getMemberState().primary()) {
            // primary has no one to update to
            return Status::OK();
        }
        BSONObjBuilder cmd;
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            // the command could not be created, likely because the node was removed from the set
            if (!replCoord->prepareReplSetUpdatePositionCommand(&cmd)) {
                return Status::OK();
            }
        }
        BSONObj res;

        LOG(2) << "Sending slave oplog progress to upstream updater: " << cmd.done();
        try {
            _connection->runCommand("admin", cmd.obj(), res);
        }
        catch (const DBException& e) {
            log() << "SyncSourceFeedback error sending update: " << e.what() << endl;
            // blacklist sync target for .5 seconds and find a new one
            replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
            BackgroundSync::get()->clearSyncTarget();
            _resetConnection();
            return e.toStatus();
        }

        Status status = Command::getStatusFromCommandResult(res);
        if (!status.isOK()) {
            log() << "SyncSourceFeedback error sending update, response: " << res.toString() <<endl;
            // blacklist sync target for .5 seconds and find a new one, unless we were rejected due
            // to the syncsource having a newer config
            if (status != ErrorCodes::InvalidReplicaSetConfig || res["cfgver"].eoo() ||
                    res["cfgver"].numberLong() < replCoord->getConfig().getConfigVersion()) {
                replCoord->blacklistSyncSource(_syncTarget, Date_t::now() + Milliseconds(500));
                BackgroundSync::get()->clearSyncTarget();
                _resetConnection();
            }
        }

        return status;
    }
Beispiel #15
0
/* tail an oplog.  ok to return, will be re-called. */
void SyncTail::oplogApplication() {
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    ApplyBatchFinalizer finalizer(replCoord);

    OperationContextImpl txn;
    OpTime originalEndOpTime(getMinValid(&txn).end);

    while (!inShutdown()) {
        OpQueue ops;

        Timer batchTimer;
        int lastTimeChecked = 0;

        do {
            int now = batchTimer.seconds();

            // apply replication batch limits
            if (!ops.empty()) {
                if (now > replBatchLimitSeconds)
                    break;
                if (ops.getDeque().size() > replBatchLimitOperations)
                    break;
            }
            // occasionally check some things
            // (always checked in the first iteration of this do-while loop, because
            // ops is empty)
            if (ops.empty() || now > lastTimeChecked) {
                BackgroundSync* bgsync = BackgroundSync::get();
                if (bgsync->getInitialSyncRequestedFlag()) {
                    // got a resync command
                    return;
                }
                lastTimeChecked = now;
                // can we become secondary?
                // we have to check this before calling mgr, as we must be a secondary to
                // become primary
                tryToGoLiveAsASecondary(&txn, replCoord);
            }

            const int slaveDelaySecs = durationCount<Seconds>(replCoord->getSlaveDelaySecs());
            if (!ops.empty() && slaveDelaySecs > 0) {
                const BSONObj lastOp = ops.back();
                const unsigned int opTimestampSecs = lastOp["ts"].timestamp().getSecs();

                // Stop the batch as the lastOp is too new to be applied. If we continue
                // on, we can get ops that are way ahead of the delay and this will
                // make this thread sleep longer when handleSlaveDelay is called
                // and apply ops much sooner than we like.
                if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) {
                    break;
                }
            }

            if (MONGO_FAIL_POINT(rsSyncApplyStop)) {
                break;
            }

            // keep fetching more ops as long as we haven't filled up a full batch yet
        } while (!tryPopAndWaitForMore(&txn, &ops, replCoord) &&  // tryPopAndWaitForMore returns
                                                                  // true when we need to end a
                                                                  // batch early
                 (ops.getSize() < replBatchLimitBytes) &&
                 !inShutdown());

        // For pausing replication in tests
        while (MONGO_FAIL_POINT(rsSyncApplyStop)) {
            sleepmillis(0);
            if (inShutdown())
                return;
        }

        if (ops.empty()) {
            continue;
        }

        const BSONObj lastOp = ops.back();
        handleSlaveDelay(lastOp);

        // Set minValid to the last OpTime that needs to be applied, in this batch or from the
        // (last) failed batch, whichever is larger.
        // This will cause this node to go into RECOVERING state
        // if we should crash and restart before updating finishing.
        const OpTime start(getLastSetTimestamp(), OpTime::kUninitializedTerm);

        // Take the max of the first endOptime (if we recovered) and the end of our batch.
        const auto lastOpTime = fassertStatusOK(28773, OpTime::parseFromOplogEntry(lastOp));

        // Setting end to the max of originalEndOpTime and lastOpTime (the end of the batch)
        // ensures that we keep pushing out the point where we can become consistent
        // and allow reads. If we recover and end up doing smaller batches we must pass the
        // originalEndOpTime before we are good.
        //
        // For example:
        // batch apply, 20-40, end = 40
        // batch failure,
        // restart
        // batch apply, 20-25, end = max(25, 40) = 40
        // batch apply, 25-45, end = 45
        const OpTime end(std::max(originalEndOpTime, lastOpTime));

        // This write will not journal/checkpoint.
        setMinValid(&txn, {start, end});

        OpTime finalOpTime = multiApply(&txn, ops);
        setNewTimestamp(finalOpTime.getTimestamp());

        setMinValid(&txn, end, DurableRequirement::None);
        finalizer.record(finalOpTime);
    }
}
void appendReplicationInfo(OperationContext* opCtx, BSONObjBuilder& result, int level) {
    ReplicationCoordinator* replCoord = ReplicationCoordinator::get(opCtx);
    if (replCoord->getSettings().usingReplSets()) {
        IsMasterResponse isMasterResponse;
        replCoord->fillIsMasterForReplSet(&isMasterResponse);
        result.appendElements(isMasterResponse.toBSON());
        if (level) {
            replCoord->appendSlaveInfoData(&result);
        }
        return;
    }

    result.appendBool("ismaster",
                      ReplicationCoordinator::get(opCtx)->isMasterForReportingPurposes());

    if (level) {
        BSONObjBuilder sources(result.subarrayStart("sources"));

        int n = 0;
        list<BSONObj> src;
        {
            const NamespaceString localSources{"local.sources"};
            AutoGetCollectionForReadCommand ctx(opCtx, localSources);
            auto exec = InternalPlanner::collectionScan(
                opCtx, localSources.ns(), ctx.getCollection(), PlanExecutor::NO_YIELD);
            BSONObj obj;
            PlanExecutor::ExecState state;
            while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
                src.push_back(obj.getOwned());
            }

            // Non-yielding collection scans from InternalPlanner will never error.
            invariant(PlanExecutor::IS_EOF == state);
        }

        for (list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++) {
            BSONObj s = *i;
            BSONObjBuilder bb;
            bb.append(s["host"]);
            string sourcename = s["source"].valuestr();
            if (sourcename != "main")
                bb.append(s["source"]);
            {
                BSONElement e = s["syncedTo"];
                BSONObjBuilder t(bb.subobjStart("syncedTo"));
                t.appendDate("time", e.timestampTime());
                t.append("inc", e.timestampInc());
                t.done();
            }

            if (level > 1) {
                invariant(!opCtx->lockState()->isLocked());
                // note: there is no so-style timeout on this connection; perhaps we should have
                // one.
                ScopedDbConnection conn(s["host"].valuestr());

                DBClientConnection* cliConn = dynamic_cast<DBClientConnection*>(&conn.conn());
                if (cliConn && replAuthenticate(cliConn)) {
                    BSONObj first = conn->findOne((string) "local.oplog.$" + sourcename,
                                                  Query().sort(BSON("$natural" << 1)));
                    BSONObj last = conn->findOne((string) "local.oplog.$" + sourcename,
                                                 Query().sort(BSON("$natural" << -1)));
                    bb.appendDate("masterFirst", first["ts"].timestampTime());
                    bb.appendDate("masterLast", last["ts"].timestampTime());
                    const auto lag = (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
                    bb.append("lagSeconds", durationCount<Milliseconds>(lag) / 1000.0);
                }
                conn.done();
            }

            sources.append(BSONObjBuilder::numStr(n++), bb.obj());
        }

        sources.done();

        replCoord->appendSlaveInfoData(&result);
    }
}
Beispiel #17
0
    static void _logOpRS(OperationContext* txn,
                         const char *opstr,
                         const char *ns,
                         const char *logNS,
                         const BSONObj& obj,
                         BSONObj *o2,
                         bool *bb,
                         bool fromMigrate ) {
        Lock::DBLock lk1(txn->lockState(), "local", newlm::MODE_X);
        WriteUnitOfWork wunit(txn);

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 )
                resetSlaveCache();
            return;
        }
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        long long hashNew = BackgroundSync::get()->getLastAppliedHash();

        // Check to make sure logOp() is legal at this point.
        if (*opstr == 'n') {
            // 'n' operations are always logged
            invariant(*ns == '\0');

            // 'n' operations do not advance the hash, since they are not rolled back
        }
        else {
            if (!replCoord->canAcceptWritesForDatabase(nsToDatabaseSubstring(ns))) {
                severe() << "replSet error : logOp() but can't accept write to collection " << ns;
                fassertFailed(17405);
            }

            // Advance the hash
            hashNew = (hashNew * 131 + ts.asLL()) * 17 + replCoord->getMyId();
        }


        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        logopbufbuilder.reset();
        BSONObjBuilder b(logopbufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("h", hashNew);
        b.append("v", OPLOG_VERSION);
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done();

        DEV verify( logNS == 0 ); // check this was never a master/slave master

        if ( localOplogRSCollection == 0 ) {
            Client::Context ctx(txn, rsoplog);
            localDB = ctx.db();
            verify( localDB );
            localOplogRSCollection = localDB->getCollection( txn, rsoplog );
            massert(13347, "local.oplog.rs missing. did you drop it? if so restart server", localOplogRSCollection);
        }

        Client::Context ctx(txn, rsoplog, localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogRSCollection->insertDocument( txn, &writer, false ) );

        BackgroundSync::get()->setLastAppliedHash(hashNew);
        ctx.getClient()->setLastOp( ts );
        replCoord->setMyLastOptime(txn, ts);

        wunit.commit();

    }
Beispiel #18
0
    static void _logOpOld(OperationContext* txn,
                          const char *opstr,
                          const char *ns,
                          const char *logNS,
                          const BSONObj& obj,
                          BSONObj *o2,
                          bool *bb,
                          bool fromMigrate ) {
        Lock::DBLock lk(txn->lockState(), "local", newlm::MODE_X);
        WriteUnitOfWork wunit(txn);
        static BufBuilder bufbuilder(8*1024); // todo there is likely a mutex on this constructor

        if ( strncmp(ns, "local.", 6) == 0 ) {
            if ( strncmp(ns, "local.slaves", 12) == 0 ) {
                resetSlaveCache();
            }
            return;
        }

        mutex::scoped_lock lk2(newOpMutex);

        OpTime ts(getNextGlobalOptime());
        newOptimeNotifier.notify_all();

        /* we jump through a bunch of hoops here to avoid copying the obj buffer twice --
           instead we do a single copy to the destination position in the memory mapped file.
        */

        bufbuilder.reset();
        BSONObjBuilder b(bufbuilder);
        b.appendTimestamp("ts", ts.asDate());
        b.append("op", opstr);
        b.append("ns", ns);
        if (fromMigrate) 
            b.appendBool("fromMigrate", true);
        if ( bb )
            b.appendBool("b", *bb);
        if ( o2 )
            b.append("o2", *o2);
        BSONObj partial = b.done(); // partial is everything except the o:... part.

        if( logNS == 0 ) {
            logNS = "local.oplog.$main";
        }

        if ( localOplogMainCollection == 0 ) {
            Client::Context ctx(txn, logNS);
            localDB = ctx.db();
            verify( localDB );
            localOplogMainCollection = localDB->getCollection(txn, logNS);
            verify( localOplogMainCollection );
        }

        Client::Context ctx(txn, logNS , localDB);
        OplogDocWriter writer( partial, obj );
        checkOplogInsert( localOplogMainCollection->insertDocument( txn, &writer, false ) );

        ctx.getClient()->setLastOp( ts );

        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        replCoord->setMyLastOptime(txn, ts);
        wunit.commit();
    }
Beispiel #19
0
void runSyncThread() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

    // Overwrite prefetch index mode in BackgroundSync if ReplSettings has a mode set.
    ReplSettings replSettings = replCoord->getSettings();
    if (replSettings.isPrefetchIndexModeSet())
        BackgroundSync::get()->setIndexPrefetchConfig(replSettings.getPrefetchIndexMode());

    while (!inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
            // Check criteria for doing an initial sync:
            // 1. If the oplog is empty, do an initial sync
            // 2. If minValid has _initialSyncFlag set, do an initial sync
            // 3. If initialSyncRequested is true
            if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                getInitialSyncFlag() || initialSyncRequested) {
                syncDoInitialSync();
                continue;  // start from top again in case sync failed.
            }
            if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            /* we have some data.  continue tailing. */
            SyncTail tail(BackgroundSync::get(), multiSyncApply);
            tail.oplogApplication();
        } catch (...) {
            std::terminate();
        }
    }
}
Beispiel #20
0
    void runSyncThread() {
        Client::initThread("rsSync");
        replLocalAuth();
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        // Set initial indexPrefetch setting
        std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
        if (!prefetch.empty()) {
            BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
            if (prefetch == "none")
                prefetchConfig = BackgroundSync::PREFETCH_NONE;
            else if (prefetch == "_id_only")
                prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
            else if (prefetch == "all")
                prefetchConfig = BackgroundSync::PREFETCH_ALL;
            else {
                warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
                          << "to \"all\"";
            }
            BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
        }

        while (!inShutdown()) {
            // After a reconfig, we may not be in the replica set anymore, so
            // check that we are in the set (and not an arbiter) before
            // trying to sync with other replicas.
            // TODO(spencer): Use a condition variable to await loading a config
            if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet) {
                log() << "replSet warning did not receive a valid config yet, sleeping 5 seconds "
                      << rsLog;
                sleepsecs(5);
                continue;
            }

            const MemberState memberState = replCoord->getCurrentMemberState();
            if (replCoord->getCurrentMemberState().arbiter()) {
                break;
            }

            try {

                if (memberState.primary()) {
                    sleepsecs(1);
                    continue;
                }

                bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
                // Check criteria for doing an initial sync:
                // 1. If the oplog is empty, do an initial sync
                // 2. If minValid has _initialSyncFlag set, do an initial sync
                // 3. If initialSyncRequested is true
                if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                        getInitialSyncFlag() ||
                        initialSyncRequested) {
                    syncDoInitialSync();
                    continue; // start from top again in case sync failed.
                }
                replCoord->setFollowerMode(MemberState::RS_RECOVERING);

                /* we have some data.  continue tailing. */
                SyncTail tail(BackgroundSync::get(), multiSyncApply);
                tail.oplogApplication();
            }
            catch(const DBException& e) {
                log() << "Received exception while syncing: " << e.toString();
                sleepsecs(10);
            }
            catch(...) {
                sethbmsg("unexpected exception in syncThread()");
                // TODO : SET NOT SECONDARY here?
                sleepsecs(60);
            }
        }
        cc().shutdown();
    }
Beispiel #21
0
    /* tail an oplog.  ok to return, will be re-called. */
    void SyncTail::oplogApplication() {
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        while(!inShutdown()) {
            OpQueue ops;
            OperationContextImpl txn;

            Timer batchTimer;
            int lastTimeChecked = 0;

            do {
                int now = batchTimer.seconds();

                // apply replication batch limits
                if (!ops.empty()) {
                    if (now > replBatchLimitSeconds)
                        break;
                    if (ops.getDeque().size() > replBatchLimitOperations)
                        break;
                }
                // occasionally check some things
                // (always checked in the first iteration of this do-while loop, because
                // ops is empty)
                if (ops.empty() || now > lastTimeChecked) {
                    BackgroundSync* bgsync = BackgroundSync::get();
                    if (bgsync->getInitialSyncRequestedFlag()) {
                        // got a resync command
                        Lock::DBLock lk(txn.lockState(), "local", MODE_X);
                        WriteUnitOfWork wunit(&txn);
                        Client::Context ctx(&txn, "local");

                        ctx.db()->dropCollection(&txn, "local.oplog.rs");

                        // Note: the following order is important.
                        // The bgsync thread uses an empty optime as a sentinel to know to wait
                        // for initial sync (done in this thread after we return); thus, we must
                        // ensure the lastAppliedOptime is empty before pausing the bgsync thread
                        // via stop().
                        // We must clear the sync source blacklist after calling stop()
                        // because the bgsync thread, while running, may update the blacklist.
                        replCoord->setMyLastOptime(&txn, OpTime());
                        bgsync->stop();
                        replCoord->clearSyncSourceBlacklist();

                        wunit.commit();

                        return;
                    }
                    lastTimeChecked = now;
                    // can we become secondary?
                    // we have to check this before calling mgr, as we must be a secondary to
                    // become primary
                    tryToGoLiveAsASecondary(&txn, replCoord);

                    // TODO(emilkie): This can be removed once we switch over from legacy;
                    // this code is what moves 1-node sets to PRIMARY state.
                    // normally msgCheckNewState gets called periodically, but in a single node
                    // replset there are no heartbeat threads, so we do it here to be sure.  this is
                    // relevant if the singleton member has done a stepDown() and needs to come back
                    // up.
                    if (theReplSet &&
                            theReplSet->config().members.size() == 1 &&
                            theReplSet->myConfig().potentiallyHot()) {
                        Manager* mgr = theReplSet->mgr;
                        // When would mgr be null?  During replsettest'ing, in which case we should
                        // fall through and actually apply ops as if we were a real secondary.
                        if (mgr) { 
                            mgr->send(stdx::bind(&Manager::msgCheckNewState, theReplSet->mgr));
                            sleepsecs(1);
                            // There should never be ops to sync in a 1-member set, anyway
                            return;
                        }
                    }
                }

                const int slaveDelaySecs = replCoord->getSlaveDelaySecs().total_seconds();
                if (!ops.empty() && slaveDelaySecs > 0) {
                    const BSONObj& lastOp = ops.getDeque().back();
                    const unsigned int opTimestampSecs = lastOp["ts"]._opTime().getSecs();

                    // Stop the batch as the lastOp is too new to be applied. If we continue
                    // on, we can get ops that are way ahead of the delay and this will
                    // make this thread sleep longer when handleSlaveDelay is called
                    // and apply ops much sooner than we like.
                    if (opTimestampSecs > static_cast<unsigned int>(time(0) - slaveDelaySecs)) {
                        break;
                    }
                }
                // keep fetching more ops as long as we haven't filled up a full batch yet
            } while (!tryPopAndWaitForMore(&ops, replCoord) && // tryPopAndWaitForMore returns true 
                                                               // when we need to end a batch early
                   (ops.getSize() < replBatchLimitBytes) &&
                   !inShutdown());

            // For pausing replication in tests
            while (MONGO_FAIL_POINT(rsSyncApplyStop)) {
                sleepmillis(0);
            }

            if (ops.empty()) {
                continue;
            }

            const BSONObj& lastOp = ops.getDeque().back();
            handleSlaveDelay(lastOp);

            if (replCoord->getCurrentMemberState().primary() && 
                !replCoord->isWaitingForApplierToDrain()) {
                severe() << "attempting to replicate ops while primary";
                fassertFailed(28527);
            }

            // Set minValid to the last op to be applied in this next batch.
            // This will cause this node to go into RECOVERING state
            // if we should crash and restart before updating the oplog
            OpTime minValid = lastOp["ts"]._opTime();
            setMinValid(&txn, minValid);

            multiApply(ops.getDeque());

            applyOpsToOplog(&ops.getDeque());

            // If we're just testing (no manager), don't keep looping if we exhausted the bgqueue
            // TODO(spencer): Remove repltest.cpp dbtest or make this work with the new replication
            // coordinator
            if (theReplSet && !theReplSet->mgr) {
                BSONObj op;
                if (!peek(&op)) {
                    return;
                }
            }
        }
    }
Beispiel #22
0
 void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) {
     ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
     if (replCoord->getSettings().usingReplSets()) {
         if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet
                 || replCoord->getCurrentMemberState().shunned()) {
             result.append("ismaster", false);
             result.append("secondary", false);
             result.append("info", ReplSet::startupStatusMsg.get());
             result.append( "isreplicaset" , true );
         }
         else {
             theReplSet->fillIsMaster(result);
         }
         return;
     }
     
     if ( replAllDead ) {
         result.append("ismaster", 0);
         string s = string("dead: ") + replAllDead;
         result.append("info", s);
     }
     else {
         result.appendBool("ismaster",
                           getGlobalReplicationCoordinator()->isMasterForReportingPurposes());
     }
     
     if (level && replCoord->getSettings().usingReplSets()) {
         result.append( "info" , "is replica set" );
     }
     else if ( level ) {
         BSONObjBuilder sources( result.subarrayStart( "sources" ) );
         
         int n = 0;
         list<BSONObj> src;
         {
             const char* localSources = "local.sources";
             Client::ReadContext ctx(txn, localSources);
             auto_ptr<PlanExecutor> exec(
                 InternalPlanner::collectionScan(txn,
                                                 localSources,
                                                 ctx.ctx().db()->getCollection(txn,
                                                                               localSources)));
             BSONObj obj;
             Runner::RunnerState state;
             while (Runner::RUNNER_ADVANCED == (state = exec->getNext(&obj, NULL))) {
                 src.push_back(obj);
             }
         }
         
         for( list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++ ) {
             BSONObj s = *i;
             BSONObjBuilder bb;
             bb.append( s["host"] );
             string sourcename = s["source"].valuestr();
             if ( sourcename != "main" )
                 bb.append( s["source"] );
             {
                 BSONElement e = s["syncedTo"];
                 BSONObjBuilder t( bb.subobjStart( "syncedTo" ) );
                 t.appendDate( "time" , e.timestampTime() );
                 t.append( "inc" , e.timestampInc() );
                 t.done();
             }
             
             if ( level > 1 ) {
                 wassert(txn->lockState()->threadState() == 0);
                 // note: there is no so-style timeout on this connection; perhaps we should have one.
                 ScopedDbConnection conn(s["host"].valuestr());
                 
                 DBClientConnection *cliConn = dynamic_cast< DBClientConnection* >( &conn.conn() );
                 if ( cliConn && replAuthenticate(cliConn) ) {
                     BSONObj first = conn->findOne( (string)"local.oplog.$" + sourcename,
                                                           Query().sort( BSON( "$natural" << 1 ) ) );
                     BSONObj last = conn->findOne( (string)"local.oplog.$" + sourcename,
                                                          Query().sort( BSON( "$natural" << -1 ) ) );
                     bb.appendDate( "masterFirst" , first["ts"].timestampTime() );
                     bb.appendDate( "masterLast" , last["ts"].timestampTime() );
                     double lag = (double) (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
                     bb.append( "lagSeconds" , lag / 1000 );
                 }
                 conn.done();
             }
             
             sources.append( BSONObjBuilder::numStr( n++ ) , bb.obj() );
         }
         
         sources.done();
     }
 }
Beispiel #23
0
void appendReplicationInfo(OperationContext* txn, BSONObjBuilder& result, int level) {
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    if (replCoord->getSettings().usingReplSets()) {
        IsMasterResponse isMasterResponse;
        replCoord->fillIsMasterForReplSet(&isMasterResponse);
        result.appendElements(isMasterResponse.toBSON());
        if (level) {
            replCoord->appendSlaveInfoData(&result);
        }
        return;
    }

    // TODO(dannenberg) replAllDead is bad and should be removed when master slave is removed
    if (replAllDead) {
        result.append("ismaster", 0);
        string s = string("dead: ") + replAllDead;
        result.append("info", s);
    } else {
        result.appendBool("ismaster",
                          getGlobalReplicationCoordinator()->isMasterForReportingPurposes());
    }

    if (level) {
        BSONObjBuilder sources(result.subarrayStart("sources"));

        int n = 0;
        list<BSONObj> src;
        {
            const char* localSources = "local.sources";
            AutoGetCollectionForRead ctx(txn, localSources);
            unique_ptr<PlanExecutor> exec(
                InternalPlanner::collectionScan(txn, localSources, ctx.getCollection()));
            BSONObj obj;
            PlanExecutor::ExecState state;
            while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
                src.push_back(obj);
            }
        }

        for (list<BSONObj>::const_iterator i = src.begin(); i != src.end(); i++) {
            BSONObj s = *i;
            BSONObjBuilder bb;
            bb.append(s["host"]);
            string sourcename = s["source"].valuestr();
            if (sourcename != "main")
                bb.append(s["source"]);
            {
                BSONElement e = s["syncedTo"];
                BSONObjBuilder t(bb.subobjStart("syncedTo"));
                t.appendDate("time", e.timestampTime());
                t.append("inc", e.timestampInc());
                t.done();
            }

            if (level > 1) {
                wassert(!txn->lockState()->isLocked());
                // note: there is no so-style timeout on this connection; perhaps we should have
                // one.
                ScopedDbConnection conn(s["host"].valuestr());

                DBClientConnection* cliConn = dynamic_cast<DBClientConnection*>(&conn.conn());
                if (cliConn && replAuthenticate(cliConn)) {
                    BSONObj first = conn->findOne((string) "local.oplog.$" + sourcename,
                                                  Query().sort(BSON("$natural" << 1)));
                    BSONObj last = conn->findOne((string) "local.oplog.$" + sourcename,
                                                 Query().sort(BSON("$natural" << -1)));
                    bb.appendDate("masterFirst", first["ts"].timestampTime());
                    bb.appendDate("masterLast", last["ts"].timestampTime());
                    const auto lag = (last["ts"].timestampTime() - s["syncedTo"].timestampTime());
                    bb.append("lagSeconds", durationCount<Milliseconds>(lag) / 1000.0);
                }
                conn.done();
            }

            sources.append(BSONObjBuilder::numStr(n++), bb.obj());
        }

        sources.done();

        replCoord->appendSlaveInfoData(&result);
    }
}
Beispiel #24
0
void runSyncThread() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

    // Set initial indexPrefetch setting
    const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
    if (!prefetch.empty()) {
        BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
        if (prefetch == "none")
            prefetchConfig = BackgroundSync::PREFETCH_NONE;
        else if (prefetch == "_id_only")
            prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
        else if (prefetch == "all")
            prefetchConfig = BackgroundSync::PREFETCH_ALL;
        else {
            warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
                      << "to \"all\"";
        }
        BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
    }

    while (!inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
            // Check criteria for doing an initial sync:
            // 1. If the oplog is empty, do an initial sync
            // 2. If minValid has _initialSyncFlag set, do an initial sync
            // 3. If initialSyncRequested is true
            if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                getInitialSyncFlag() || initialSyncRequested) {
                syncDoInitialSync();
                continue;  // start from top again in case sync failed.
            }
            if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            /* we have some data.  continue tailing. */
            SyncTail tail(BackgroundSync::get(), multiSyncApply);
            tail.oplogApplication();
        } catch (const DBException& e) {
            log() << "Received exception while syncing: " << e.toString();
            sleepsecs(10);
        } catch (const std::exception& e) {
            log() << "Received exception while syncing: " << e.what();
            sleepsecs(10);
        }
    }
}
void ReplClientInfo::setLastOpToSystemLastOpTime(OperationContext* txn) {
    ReplicationCoordinator* replCoord = repl::ReplicationCoordinator::get(txn->getServiceContext());
    if (replCoord->isReplEnabled() && txn->writesAreReplicated()) {
        setLastOp(replCoord->getMyLastOptime());
    }
}