void SyncSourceFeedback::run() {
        Client::initThread("SyncSourceFeedbackThread");
        OperationContextImpl txn;

        bool positionChanged = false;
        bool handshakeNeeded = false;
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
            {
                boost::unique_lock<boost::mutex> lock(_mtx);
                while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
                    _cond.wait(lock);
                }

                if (_shutdownSignaled) {
                    break;
                }

                positionChanged = _positionChanged;
                handshakeNeeded = _handshakeNeeded;
                _positionChanged = false;
                _handshakeNeeded = false;
            }

            MemberState state = replCoord->getCurrentMemberState();
            if (state.primary() || state.fatal() || state.startup()) {
                continue;
            }
            const Member* target = BackgroundSync::get()->getSyncTarget();
            if (_syncTarget != target) {
                _resetConnection();
                _syncTarget = target;
            }
            if (!hasConnection()) {
                // fix connection if need be
                if (!target) {
                    sleepmillis(500);
                    continue;
                }
                if (!_connect(&txn, target->fullName())) {
                    sleepmillis(500);
                    continue;
                }
            }
            if (handshakeNeeded) {
                if (!replHandshake(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _handshakeNeeded = true;
                    continue;
                }
            }
            if (positionChanged) {
                if (!updateUpstream(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _positionChanged = true;
                }
            }
        }
        cc().shutdown();
    }
Exemple #2
0
    void BackgroundSync::_producerThread() {
        MemberState state = theReplSet->state();

        // we want to pause when the state changes to primary
        if (isAssumingPrimary() || state.primary()) {
            if (!_pause) {
                stop();
            }
            sleepsecs(1);
            return;
        }

        if (state.fatal() || state.startup()) {
            sleepsecs(5);
            return;
        }

        // if this member has an empty oplog, we cannot start syncing
        if (theReplSet->lastOpTimeWritten.isNull()) {
            sleepsecs(1);
            return;
        }
        // we want to unpause when we're no longer primary
        // start() also loads _lastOpTimeFetched, which we know is set from the "if"
        else if (_pause) {
            start();
        }

        produce();
    }
Exemple #3
0
void BackgroundSync::_producerThread(executor::TaskExecutor* taskExecutor) {
    const MemberState state = _replCoord->getMemberState();
    // we want to pause when the state changes to primary
    if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
        if (!isPaused()) {
            stop();
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastOptime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to unpause when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    OperationContextImpl txn;
    if (isPaused()) {
        start(&txn);
    }

    _produce(&txn, taskExecutor);
}
Exemple #4
0
    void BackgroundSync::_producerThread() {
        MemberState state = theReplSet->state();

        // we want to pause when the state changes to primary
        if (isAssumingPrimary() || state.primary()) {
            if (!_pause) {
                stop();
            }
            sleepsecs(1);
            return;
        }

        if (state.startup()) {
            sleepsecs(1);
            return;
        }

        OperationContextImpl txn;

        // We need to wait until initial sync has started.
        if (_replCoord->getMyLastOptime().isNull()) {
            sleepsecs(1);
            return;
        }
        // we want to unpause when we're no longer primary
        // start() also loads _lastOpTimeFetched, which we know is set from the "if"
        else if (_pause) {
            start(&txn);
        }

        produce(&txn);
    }
Exemple #5
0
    void BackgroundSync::notifierThread() {
        Client::initThread("rsSyncNotifier");
        replLocalAuth();

        // This makes the initial connection to our sync source for oplog position notification.
        // It also sets the supportsUpdater flag so we know which method to use.
        // If this function fails, we ignore that situation because it will be taken care of
        // the first time markOplog() is called in the loop below.
        {
            boost::unique_lock<boost::mutex> oplogLockSSF(theReplSet->syncSourceFeedback.oplock);
            connectOplogNotifier();
        }
        theReplSet->syncSourceFeedback.go();

        while (!inShutdown()) {
            bool clearTarget = false;

            if (!theReplSet) {
                sleepsecs(5);
                continue;
            }

            MemberState state = theReplSet->state();
            if (state.primary() || state.fatal() || state.startup()) {
                sleepsecs(5);
                continue;
            }

            try {
                {
                    boost::unique_lock<boost::mutex> lock(_lastOpMutex);
                    while (_consumedOpTime == theReplSet->lastOpTimeWritten) {
                        _lastOpCond.wait(lock);
                    }
                }

                markOplog();
            }
            catch (DBException &e) {
                clearTarget = true;
                log() << "replset tracking exception: " << e.getInfo() << rsLog;
                sleepsecs(1);
            }
            catch (std::exception &e2) {
                clearTarget = true;
                log() << "replset tracking error" << e2.what() << rsLog;
                sleepsecs(1);
            }

            if (clearTarget) {
                boost::unique_lock<boost::mutex> lock(_mutex);
                _oplogMarkerTarget = NULL;
            }
        }

        cc().shutdown();
    }
Exemple #6
0
void SyncSourceFeedback::run() {
    Client::initThread("SyncSourceFeedback");

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    while (true) {  // breaks once _shutdownSignaled is true
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, _keepAliveInterval) == stdx::cv_status::timeout) {
                    break;
                }
            }

            if (_shutdownSignaled) {
                break;
            }

            _positionChanged = false;
        }

        auto txn = cc().makeOperationContext();
        MemberState state = replCoord->getMemberState();
        if (state.primary() || state.startup()) {
            _resetConnection();
            continue;
        }
        const HostAndPort target = BackgroundSync::get()->getSyncTarget();
        if (_syncTarget != target) {
            _resetConnection();
            _syncTarget = target;
        }
        if (!hasConnection()) {
            // fix connection if need be
            if (target.empty()) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
            if (!_connect(txn.get(), target)) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
        }
        Status status = updateUpstream(txn.get());
        if (!status.isOK()) {
            sleepmillis(500);
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            _positionChanged = true;
        }
    }
}
Exemple #7
0
    void BackgroundSync::notifierThread() {
        Client::initThread("rsSyncNotifier");
        replLocalAuth();
        theReplSet->syncSourceFeedback.go();

        while (!inShutdown()) {
            bool clearTarget = false;

            if (!theReplSet) {
                sleepsecs(5);
                continue;
            }

            MemberState state = theReplSet->state();
            if (state.primary() || state.fatal() || state.startup()) {
                sleepsecs(5);
                continue;
            }

            try {
                {
                    boost::unique_lock<boost::mutex> lock(_lastOpMutex);
                    while (_consumedOpTime == theReplSet->lastOpTimeWritten) {
                        _lastOpCond.wait(lock);
                    }
                }

                markOplog();
            }
            catch (DBException &e) {
                clearTarget = true;
                log() << "replset tracking exception: " << e.getInfo() << rsLog;
                sleepsecs(1);
            }
            catch (std::exception &e2) {
                clearTarget = true;
                log() << "replset tracking error" << e2.what() << rsLog;
                sleepsecs(1);
            }

            if (clearTarget) {
                boost::unique_lock<boost::mutex> lock(_mutex);
                _oplogMarkerTarget = NULL;
            }
        }

        cc().shutdown();
    }
Exemple #8
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {

            ScopedTransaction transaction(txn, MODE_X);
            Lock::GlobalWrite globalWriteLock(txn->lockState());

            ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
            if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
                const MemberState memberState = replCoord->getMemberState();
                if (memberState.startup()) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotYetInitialized,
                                                              "no replication yet active"));
                }
                if (memberState.primary() ||
                        !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotSecondary,
                                                              "primaries cannot resync"));
                }
                BackgroundSync::get()->setInitialSyncRequestedFlag(true);
                return true;
            }

            // below this comment pertains only to master/slave replication
            if ( cmdObj.getBoolField( "force" ) ) {
                if ( !waitForSyncToFinish(txn, errmsg ) )
                    return false;
                replAllDead = "resync forced";
            }
            // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
            if (!replAllDead) {
                errmsg = "not dead, no need to resync";
                return false;
            }
            if ( !waitForSyncToFinish(txn, errmsg ) )
                return false;

            ReplSource::forceResyncDead( txn, "client" );
            result.append( "info", "triggered resync for all sources" );

            return true;
        }
void BackgroundSync::_producerThread() {
    const MemberState state = _replCoord->getMemberState();
    // we want to pause when the state changes to primary
    if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
        if (!isPaused()) {
            stop();
        }
        if (_replCoord->isWaitingForApplierToDrain()) {
            // Signal to consumers that we have entered the paused state if the signal isn't already
            // in the queue.
            const boost::optional<BSONObj> lastObjectPushed = _buffer.lastObjectPushed();
            if (!lastObjectPushed || !lastObjectPushed->isEmpty()) {
                const BSONObj sentinelDoc;
                _buffer.pushEvenIfFull(sentinelDoc);
                bufferCountGauge.increment();
                bufferSizeGauge.increment(sentinelDoc.objsize());
            }
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastOptime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to unpause when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    OperationContextImpl txn;
    if (isPaused()) {
        start(&txn);
    }

    _produce(&txn);
}
Exemple #10
0
void BackgroundSync::_runProducer() {
    const MemberState state = _replCoord->getMemberState();
    // Stop when the state changes to primary.
    //
    // TODO(siyuan) Drain mode should imply we're the primary. Fix this condition and the one below
    // after fixing step-down during drain mode.
    if (!_replCoord->isCatchingUp() &&
        (_replCoord->isWaitingForApplierToDrain() || state.primary())) {
        if (!isStopped()) {
            stop();
        }
        if (_replCoord->isWaitingForApplierToDrain()) {
            auto txn = cc().makeOperationContext();
            _signalNoNewDataForApplier(txn.get());
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastAppliedOpTime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to start when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    auto txn = cc().makeOperationContext();
    if (isStopped()) {
        start(txn.get());
    }

    _produce(txn.get());
}
Exemple #11
0
void BackgroundSync::_producerThread(
    ReplicationCoordinatorExternalState* replicationCoordinatorExternalState) {
    const MemberState state = _replCoord->getMemberState();
    // Stop when the state changes to primary.
    if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
        if (!isStopped()) {
            stop();
        }
        if (_replCoord->isWaitingForApplierToDrain()) {
            _signalNoNewDataForApplier();
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastAppliedOpTime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to start when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext();
    OperationContext& txn = *txnPtr;
    if (isStopped()) {
        start(&txn);
    }

    _produce(&txn, replicationCoordinatorExternalState);
}
Exemple #12
0
    void BackgroundSync::producerThread() {
        {
            boost::unique_lock<boost::mutex> lock(_mutex);
            _opSyncInProgress = true;
        }
        Client::initThread("rsBackgroundSync");
        replLocalAuth();
        uint32_t timeToSleep = 0;

        while (!_opSyncShouldExit) {
            try {
                if (timeToSleep) {
                    {
                        boost::unique_lock<boost::mutex> lck(_mutex);
                        _opSyncRunning = false;
                        // notify other threads that we are not running
                        _opSyncRunningCondVar.notify_all();
                    }
                    for (uint32_t i = 0; i < timeToSleep; i++) {
                        sleepsecs(1);
                        // get out if we need to
                        if (_opSyncShouldExit) { break; }
                    }
                    timeToSleep = 0;
                }
                // get out if we need to
                if (_opSyncShouldExit) { break; }

                {
                    boost::unique_lock<boost::mutex> lck(_mutex);
                    _opSyncRunning = false;

                    while (!_opSyncShouldRun && !_opSyncShouldExit) {
                        // notify other threads that we are not running
                        _opSyncRunningCondVar.notify_all();
                        // wait for permission that we can run
                        _opSyncCanRunCondVar.wait(lck);
                    }

                    // notify other threads that we are running
                    _opSyncRunningCondVar.notify_all();
                    _opSyncRunning = true;
                }
                // get out if we need to
                if (_opSyncShouldExit) { break; }

                MemberState state = theReplSet->state();
                if (state.fatal() || state.startup()) {
                    timeToSleep = 5;
                    continue;
                }
                // this does the work of reading a remote oplog
                // and writing it to our oplog
                timeToSleep = produce();
            }
            catch (DBException& e) {
                sethbmsg(str::stream() << "db exception in producer: " << e.toString());
                timeToSleep = 10;
            }
            catch (std::exception& e2) {
                sethbmsg(str::stream() << "exception in producer: " << e2.what());
                timeToSleep = 10;
            }
        }

        cc().shutdown();
        {
            boost::unique_lock<boost::mutex> lock(_mutex);
            _opSyncRunning = false;
            _opSyncInProgress = false;
        }
    }
void SyncSourceFeedback::run(executor::TaskExecutor* executor, BackgroundSync* bgsync) {
    Client::initThread("SyncSourceFeedback");

    HostAndPort syncTarget;

    // keepAliveInterval indicates how frequently to forward progress in the absence of updates.
    Milliseconds keepAliveInterval(0);

    while (true) {  // breaks once _shutdownSignaled is true
        auto txn = cc().makeOperationContext();

        if (keepAliveInterval == Milliseconds(0)) {
            keepAliveInterval = calculateKeepAliveInterval(txn.get(), _mtx);
        }

        {
            // Take SyncSourceFeedback lock before calling into ReplicationCoordinator
            // to avoid deadlock because ReplicationCoordinator could conceivably calling back into
            // this class.
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, keepAliveInterval.toSystemDuration()) ==
                    stdx::cv_status::timeout) {
                    MemberState state = ReplicationCoordinator::get(txn.get())->getMemberState();
                    if (!(state.primary() || state.startup())) {
                        break;
                    }
                }
            }

            if (_shutdownSignaled) {
                break;
            }

            _positionChanged = false;
        }

        {
            stdx::lock_guard<stdx::mutex> lock(_mtx);
            MemberState state = ReplicationCoordinator::get(txn.get())->getMemberState();
            if (state.primary() || state.startup()) {
                continue;
            }
        }

        const HostAndPort target = bgsync->getSyncTarget();
        // Log sync source changes.
        if (target.empty()) {
            if (syncTarget != target) {
                syncTarget = target;
            }
            // Loop back around again; the keepalive functionality will cause us to retry
            continue;
        }

        if (syncTarget != target) {
            LOG(1) << "setting syncSourceFeedback to " << target;
            syncTarget = target;

            // Update keepalive value from config.
            auto oldKeepAliveInterval = keepAliveInterval;
            keepAliveInterval = calculateKeepAliveInterval(txn.get(), _mtx);
            if (oldKeepAliveInterval != keepAliveInterval) {
                LOG(1) << "new syncSourceFeedback keep alive duration = " << keepAliveInterval
                       << " (previously " << oldKeepAliveInterval << ")";
            }
        }

        Reporter reporter(
            executor,
            makePrepareReplSetUpdatePositionCommandFn(txn.get(), _mtx, syncTarget, bgsync),
            syncTarget,
            keepAliveInterval);
        {
            stdx::lock_guard<stdx::mutex> lock(_mtx);
            _reporter = &reporter;
        }
        ON_BLOCK_EXIT([this]() {
            stdx::lock_guard<stdx::mutex> lock(_mtx);
            _reporter = nullptr;
        });

        auto status = _updateUpstream(txn.get(), bgsync);
        if (!status.isOK()) {
            LOG(1) << "The replication progress command (replSetUpdatePosition) failed and will be "
                      "retried: "
                   << status;
        }
    }
}