Exemplo n.º 1
0
    void BackgroundSync::_producerThread() {
        MemberState state = theReplSet->state();

        // we want to pause when the state changes to primary
        if (isAssumingPrimary() || state.primary()) {
            if (!_pause) {
                stop();
            }
            sleepsecs(1);
            return;
        }

        if (state.startup()) {
            sleepsecs(1);
            return;
        }

        OperationContextImpl txn;

        // We need to wait until initial sync has started.
        if (_replCoord->getMyLastOptime().isNull()) {
            sleepsecs(1);
            return;
        }
        // we want to unpause when we're no longer primary
        // start() also loads _lastOpTimeFetched, which we know is set from the "if"
        else if (_pause) {
            start(&txn);
        }

        produce(&txn);
    }
Exemplo n.º 2
0
    void BackgroundSync::_producerThread() {
        MemberState state = theReplSet->state();

        // we want to pause when the state changes to primary
        if (isAssumingPrimary() || state.primary()) {
            if (!_pause) {
                stop();
            }
            sleepsecs(1);
            return;
        }

        if (state.fatal() || state.startup()) {
            sleepsecs(5);
            return;
        }

        // if this member has an empty oplog, we cannot start syncing
        if (theReplSet->lastOpTimeWritten.isNull()) {
            sleepsecs(1);
            return;
        }
        // we want to unpause when we're no longer primary
        // start() also loads _lastOpTimeFetched, which we know is set from the "if"
        else if (_pause) {
            start();
        }

        produce();
    }
Exemplo n.º 3
0
void BackgroundSync::_producerThread(executor::TaskExecutor* taskExecutor) {
    const MemberState state = _replCoord->getMemberState();
    // we want to pause when the state changes to primary
    if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
        if (!isPaused()) {
            stop();
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastOptime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to unpause when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    OperationContextImpl txn;
    if (isPaused()) {
        start(&txn);
    }

    _produce(&txn, taskExecutor);
}
Exemplo n.º 4
0
    void SyncSourceFeedback::run() {
        Client::initThread("SyncSourceFeedbackThread");
        OperationContextImpl txn;

        bool positionChanged = false;
        bool handshakeNeeded = false;
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
        while (!inShutdown()) { // TODO(spencer): Remove once legacy repl coordinator is gone.
            {
                boost::unique_lock<boost::mutex> lock(_mtx);
                while (!_positionChanged && !_handshakeNeeded && !_shutdownSignaled) {
                    _cond.wait(lock);
                }

                if (_shutdownSignaled) {
                    break;
                }

                positionChanged = _positionChanged;
                handshakeNeeded = _handshakeNeeded;
                _positionChanged = false;
                _handshakeNeeded = false;
            }

            MemberState state = replCoord->getCurrentMemberState();
            if (state.primary() || state.fatal() || state.startup()) {
                continue;
            }
            const Member* target = BackgroundSync::get()->getSyncTarget();
            if (_syncTarget != target) {
                _resetConnection();
                _syncTarget = target;
            }
            if (!hasConnection()) {
                // fix connection if need be
                if (!target) {
                    sleepmillis(500);
                    continue;
                }
                if (!_connect(&txn, target->fullName())) {
                    sleepmillis(500);
                    continue;
                }
            }
            if (handshakeNeeded) {
                if (!replHandshake(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _handshakeNeeded = true;
                    continue;
                }
            }
            if (positionChanged) {
                if (!updateUpstream(&txn)) {
                    boost::unique_lock<boost::mutex> lock(_mtx);
                    _positionChanged = true;
                }
            }
        }
        cc().shutdown();
    }
Exemplo n.º 5
0
void RSDataSync::_run() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();

    // Overwrite prefetch index mode in BackgroundSync if ReplSettings has a mode set.
    auto&& replSettings = _replCoord->getSettings();
    if (replSettings.isPrefetchIndexModeSet())
        _replCoord->setIndexPrefetchConfig(replSettings.getPrefetchIndexMode());

    while (!_bgsync->inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (_replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = _replCoord->getMemberState();

        // TODO(siyuan) Control the behavior using applier state.
        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (_replCoord->getApplierState() == ReplicationCoordinator::ApplierState::Stopped) {
                sleepsecs(1);
                continue;
            }

            auto status = _replCoord->setFollowerMode(MemberState::RS_RECOVERING);
            if (!status.isOK()) {
                LOG(2) << "Failed to transition to RECOVERING to start data replication"
                       << causedBy(status);
                continue;
            }

            // Once we call into SyncTail::oplogApplication we never return, so this code only runs
            // at startup.  It is not valid to transition from PRIMARY to RECOVERING ever, or from
            // SECONDARY to RECOVERING without holding a global X lock, so we invariant to make
            // sure this never happens.
            invariant(!memberState.primary() && !memberState.secondary());
            SyncTail(_bgsync, multiSyncApply).oplogApplication(_replCoord);
        } catch (...) {
            auto status = exceptionToStatus();
            severe() << "Exception thrown in RSDataSync: " << redact(status);
            std::terminate();
        }
    }
}
Exemplo n.º 6
0
    void BackgroundSync::notifierThread() {
        Client::initThread("rsSyncNotifier");
        replLocalAuth();

        // This makes the initial connection to our sync source for oplog position notification.
        // It also sets the supportsUpdater flag so we know which method to use.
        // If this function fails, we ignore that situation because it will be taken care of
        // the first time markOplog() is called in the loop below.
        {
            boost::unique_lock<boost::mutex> oplogLockSSF(theReplSet->syncSourceFeedback.oplock);
            connectOplogNotifier();
        }
        theReplSet->syncSourceFeedback.go();

        while (!inShutdown()) {
            bool clearTarget = false;

            if (!theReplSet) {
                sleepsecs(5);
                continue;
            }

            MemberState state = theReplSet->state();
            if (state.primary() || state.fatal() || state.startup()) {
                sleepsecs(5);
                continue;
            }

            try {
                {
                    boost::unique_lock<boost::mutex> lock(_lastOpMutex);
                    while (_consumedOpTime == theReplSet->lastOpTimeWritten) {
                        _lastOpCond.wait(lock);
                    }
                }

                markOplog();
            }
            catch (DBException &e) {
                clearTarget = true;
                log() << "replset tracking exception: " << e.getInfo() << rsLog;
                sleepsecs(1);
            }
            catch (std::exception &e2) {
                clearTarget = true;
                log() << "replset tracking error" << e2.what() << rsLog;
                sleepsecs(1);
            }

            if (clearTarget) {
                boost::unique_lock<boost::mutex> lock(_mutex);
                _oplogMarkerTarget = NULL;
            }
        }

        cc().shutdown();
    }
Exemplo n.º 7
0
void RSDataSync::_run() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();

    // Overwrite prefetch index mode in BackgroundSync if ReplSettings has a mode set.
    auto&& replSettings = _replCoord->getSettings();
    if (replSettings.isPrefetchIndexModeSet())
        _replCoord->setIndexPrefetchConfig(replSettings.getPrefetchIndexMode());

    while (!_isInShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (_replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = _replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !_replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            if (!_replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            SyncTail tail(_bgsync, multiSyncApply);
            tail.oplogApplication(_replCoord, [this]() { return _isInShutdown(); });
        } catch (...) {
            std::terminate();
        }
    }

    LockGuard lk(_mutex);
    _inShutdown = false;
    _stopped = true;
}
Exemplo n.º 8
0
void SyncSourceFeedback::run() {
    Client::initThread("SyncSourceFeedback");

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    while (true) {  // breaks once _shutdownSignaled is true
        {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, _keepAliveInterval) == stdx::cv_status::timeout) {
                    break;
                }
            }

            if (_shutdownSignaled) {
                break;
            }

            _positionChanged = false;
        }

        auto txn = cc().makeOperationContext();
        MemberState state = replCoord->getMemberState();
        if (state.primary() || state.startup()) {
            _resetConnection();
            continue;
        }
        const HostAndPort target = BackgroundSync::get()->getSyncTarget();
        if (_syncTarget != target) {
            _resetConnection();
            _syncTarget = target;
        }
        if (!hasConnection()) {
            // fix connection if need be
            if (target.empty()) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
            if (!_connect(txn.get(), target)) {
                sleepmillis(500);
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
                continue;
            }
        }
        Status status = updateUpstream(txn.get());
        if (!status.isOK()) {
            sleepmillis(500);
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            _positionChanged = true;
        }
    }
}
Exemplo n.º 9
0
    void BackgroundSync::notifierThread() {
        Client::initThread("rsSyncNotifier");
        replLocalAuth();
        theReplSet->syncSourceFeedback.go();

        while (!inShutdown()) {
            bool clearTarget = false;

            if (!theReplSet) {
                sleepsecs(5);
                continue;
            }

            MemberState state = theReplSet->state();
            if (state.primary() || state.fatal() || state.startup()) {
                sleepsecs(5);
                continue;
            }

            try {
                {
                    boost::unique_lock<boost::mutex> lock(_lastOpMutex);
                    while (_consumedOpTime == theReplSet->lastOpTimeWritten) {
                        _lastOpCond.wait(lock);
                    }
                }

                markOplog();
            }
            catch (DBException &e) {
                clearTarget = true;
                log() << "replset tracking exception: " << e.getInfo() << rsLog;
                sleepsecs(1);
            }
            catch (std::exception &e2) {
                clearTarget = true;
                log() << "replset tracking error" << e2.what() << rsLog;
                sleepsecs(1);
            }

            if (clearTarget) {
                boost::unique_lock<boost::mutex> lock(_mutex);
                _oplogMarkerTarget = NULL;
            }
        }

        cc().shutdown();
    }
Exemplo n.º 10
0
        virtual bool run(OperationContext* txn,
                         const string& dbname,
                         BSONObj& cmdObj,
                         int,
                         string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {

            ScopedTransaction transaction(txn, MODE_X);
            Lock::GlobalWrite globalWriteLock(txn->lockState());

            ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
            if (getGlobalReplicationCoordinator()->getSettings().usingReplSets()) {
                const MemberState memberState = replCoord->getMemberState();
                if (memberState.startup()) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotYetInitialized,
                                                              "no replication yet active"));
                }
                if (memberState.primary() ||
                        !replCoord->setFollowerMode(MemberState::RS_STARTUP2)) {
                    return appendCommandStatus(result, Status(ErrorCodes::NotSecondary,
                                                              "primaries cannot resync"));
                }
                BackgroundSync::get()->setInitialSyncRequestedFlag(true);
                return true;
            }

            // below this comment pertains only to master/slave replication
            if ( cmdObj.getBoolField( "force" ) ) {
                if ( !waitForSyncToFinish(txn, errmsg ) )
                    return false;
                replAllDead = "resync forced";
            }
            // TODO(dannenberg) replAllDead is bad and should be removed when masterslave is removed
            if (!replAllDead) {
                errmsg = "not dead, no need to resync";
                return false;
            }
            if ( !waitForSyncToFinish(txn, errmsg ) )
                return false;

            ReplSource::forceResyncDead( txn, "client" );
            result.append( "info", "triggered resync for all sources" );

            return true;
        }
Exemplo n.º 11
0
void BackgroundSync::_producerThread() {
    const MemberState state = _replCoord->getMemberState();
    // we want to pause when the state changes to primary
    if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
        if (!isPaused()) {
            stop();
        }
        if (_replCoord->isWaitingForApplierToDrain()) {
            // Signal to consumers that we have entered the paused state if the signal isn't already
            // in the queue.
            const boost::optional<BSONObj> lastObjectPushed = _buffer.lastObjectPushed();
            if (!lastObjectPushed || !lastObjectPushed->isEmpty()) {
                const BSONObj sentinelDoc;
                _buffer.pushEvenIfFull(sentinelDoc);
                bufferCountGauge.increment();
                bufferSizeGauge.increment(sentinelDoc.objsize());
            }
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastOptime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to unpause when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    OperationContextImpl txn;
    if (isPaused()) {
        start(&txn);
    }

    _produce(&txn);
}
Exemplo n.º 12
0
void BackgroundSync::_runProducer() {
    const MemberState state = _replCoord->getMemberState();
    // Stop when the state changes to primary.
    //
    // TODO(siyuan) Drain mode should imply we're the primary. Fix this condition and the one below
    // after fixing step-down during drain mode.
    if (!_replCoord->isCatchingUp() &&
        (_replCoord->isWaitingForApplierToDrain() || state.primary())) {
        if (!isStopped()) {
            stop();
        }
        if (_replCoord->isWaitingForApplierToDrain()) {
            auto txn = cc().makeOperationContext();
            _signalNoNewDataForApplier(txn.get());
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastAppliedOpTime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to start when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    auto txn = cc().makeOperationContext();
    if (isStopped()) {
        start(txn.get());
    }

    _produce(txn.get());
}
Exemplo n.º 13
0
void BackgroundSync::_producerThread(
    ReplicationCoordinatorExternalState* replicationCoordinatorExternalState) {
    const MemberState state = _replCoord->getMemberState();
    // Stop when the state changes to primary.
    if (_replCoord->isWaitingForApplierToDrain() || state.primary()) {
        if (!isStopped()) {
            stop();
        }
        if (_replCoord->isWaitingForApplierToDrain()) {
            _signalNoNewDataForApplier();
        }
        sleepsecs(1);
        return;
    }

    // TODO(spencer): Use a condition variable to await loading a config.
    if (state.startup()) {
        // Wait for a config to be loaded
        sleepsecs(1);
        return;
    }

    // We need to wait until initial sync has started.
    if (_replCoord->getMyLastAppliedOpTime().isNull()) {
        sleepsecs(1);
        return;
    }
    // we want to start when we're no longer primary
    // start() also loads _lastOpTimeFetched, which we know is set from the "if"
    const ServiceContext::UniqueOperationContext txnPtr = cc().makeOperationContext();
    OperationContext& txn = *txnPtr;
    if (isStopped()) {
        start(&txn);
    }

    _produce(&txn, replicationCoordinatorExternalState);
}
Exemplo n.º 14
0
void SyncSourceFeedback::run(executor::TaskExecutor* executor, BackgroundSync* bgsync) {
    Client::initThread("SyncSourceFeedback");

    HostAndPort syncTarget;

    // keepAliveInterval indicates how frequently to forward progress in the absence of updates.
    Milliseconds keepAliveInterval(0);

    while (true) {  // breaks once _shutdownSignaled is true
        auto txn = cc().makeOperationContext();

        if (keepAliveInterval == Milliseconds(0)) {
            keepAliveInterval = calculateKeepAliveInterval(txn.get(), _mtx);
        }

        {
            // Take SyncSourceFeedback lock before calling into ReplicationCoordinator
            // to avoid deadlock because ReplicationCoordinator could conceivably calling back into
            // this class.
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, keepAliveInterval.toSystemDuration()) ==
                    stdx::cv_status::timeout) {
                    MemberState state = ReplicationCoordinator::get(txn.get())->getMemberState();
                    if (!(state.primary() || state.startup())) {
                        break;
                    }
                }
            }

            if (_shutdownSignaled) {
                break;
            }

            _positionChanged = false;
        }

        {
            stdx::lock_guard<stdx::mutex> lock(_mtx);
            MemberState state = ReplicationCoordinator::get(txn.get())->getMemberState();
            if (state.primary() || state.startup()) {
                continue;
            }
        }

        const HostAndPort target = bgsync->getSyncTarget();
        // Log sync source changes.
        if (target.empty()) {
            if (syncTarget != target) {
                syncTarget = target;
            }
            // Loop back around again; the keepalive functionality will cause us to retry
            continue;
        }

        if (syncTarget != target) {
            LOG(1) << "setting syncSourceFeedback to " << target;
            syncTarget = target;

            // Update keepalive value from config.
            auto oldKeepAliveInterval = keepAliveInterval;
            keepAliveInterval = calculateKeepAliveInterval(txn.get(), _mtx);
            if (oldKeepAliveInterval != keepAliveInterval) {
                LOG(1) << "new syncSourceFeedback keep alive duration = " << keepAliveInterval
                       << " (previously " << oldKeepAliveInterval << ")";
            }
        }

        Reporter reporter(
            executor,
            makePrepareReplSetUpdatePositionCommandFn(txn.get(), _mtx, syncTarget, bgsync),
            syncTarget,
            keepAliveInterval);
        {
            stdx::lock_guard<stdx::mutex> lock(_mtx);
            _reporter = &reporter;
        }
        ON_BLOCK_EXIT([this]() {
            stdx::lock_guard<stdx::mutex> lock(_mtx);
            _reporter = nullptr;
        });

        auto status = _updateUpstream(txn.get(), bgsync);
        if (!status.isOK()) {
            LOG(1) << "The replication progress command (replSetUpdatePosition) failed and will be "
                      "retried: "
                   << status;
        }
    }
}
Exemplo n.º 15
0
    void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const {
        vector<BSONObj> v;

        const Member *_self = this->_self;
        verify( _self );

        MemberState myState = box.getState();
        const HostAndPort syncTarget = BackgroundSync::get()->getSyncTarget();

        // add self
        {
            BSONObjBuilder bb;
            bb.append("_id", (int) _self->id());
            bb.append("name", _self->fullName());
            bb.append("health", 1.0);
            bb.append("state", (int)myState.s);
            bb.append("stateStr", myState.toString());
            bb.append("uptime", (unsigned)(time(0) - serverGlobalParams.started));
            if (!_self->config().arbiterOnly) {
                bb.appendTimestamp("optime", lastOpTimeWritten.asDate());
                bb.appendDate("optimeDate", lastOpTimeWritten.getSecs() * 1000LL);
            }

            int maintenance = _maintenanceMode;
            if (maintenance) {
                bb.append("maintenanceMode", maintenance);
            }

            if ( !syncTarget.empty() &&
                (myState != MemberState::RS_PRIMARY) &&
                (myState != MemberState::RS_REMOVED) ) {
                bb.append("syncingTo", syncTarget.toString());
            }

            if (theReplSet) {
                string s = theReplSet->hbmsg();
                if( !s.empty() )
                    bb.append("infoMessage", s);

                if (myState == MemberState::RS_PRIMARY) {
                    bb.appendTimestamp("electionTime", theReplSet->getElectionTime().asDate());
                    bb.appendDate("electionDate", theReplSet->getElectionTime().getSecs() * 1000LL);
                }
            }
            bb.append("self", true);
            v.push_back(bb.obj());
        }

        Member *m =_members.head();
        while( m ) {
            BSONObjBuilder bb;
            bb.append("_id", (int) m->id());
            bb.append("name", m->fullName());
            double h = m->hbinfo().health;
            bb.append("health", h);
            bb.append("state", (int) m->state().s);
            if( h == 0 ) {
                // if we can't connect the state info is from the past and could be confusing to show
                bb.append("stateStr", "(not reachable/healthy)");
            }
            else {
                bb.append("stateStr", m->state().toString());
            }
            bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0));
            if (!m->config().arbiterOnly) {
                bb.appendTimestamp("optime", m->hbinfo().opTime.asDate());
                bb.appendDate("optimeDate", m->hbinfo().opTime.getSecs() * 1000LL);
            }
            bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat);
            bb.appendTimeT("lastHeartbeatRecv", m->hbinfo().lastHeartbeatRecv);
            bb.append("pingMs", m->hbinfo().ping);
            string s = m->lhb();
            if( !s.empty() )
                bb.append("lastHeartbeatMessage", s);

            if (m->hbinfo().authIssue) {
                bb.append("authenticated", false);
            }

            string syncingTo = m->hbinfo().syncingTo;
            if (!syncingTo.empty()) {
                bb.append("syncingTo", syncingTo);
            }

            if (m->state() == MemberState::RS_PRIMARY) {
                bb.appendTimestamp("electionTime", m->hbinfo().electionTime.asDate());
                bb.appendDate("electionDate", m->hbinfo().electionTime.getSecs() * 1000LL);
            }

            v.push_back(bb.obj());
            m = m->next();
        }
        sort(v.begin(), v.end());
        b.append("set", name());
        b.appendTimeT("date", time(0));
        b.append("myState", myState.s);
        if ( !syncTarget.empty() &&
            (myState != MemberState::RS_PRIMARY) &&
            (myState != MemberState::RS_REMOVED) ) {
            b.append("syncingTo", syncTarget.toString());
        }
        b.append("members", v);
    }
Exemplo n.º 16
0
void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const {
    vector<BSONObj> v;

    const Member *_self = this->_self;
    verify( _self );

    MemberState myState = box.getState();

    // add self
    {
        BSONObjBuilder bb;
        bb.append("_id", (int) _self->id());
        bb.append("name", _self->fullName());
        bb.append("health", 1.0);
        bb.append("state", (int)myState.s);
        bb.append("stateStr", myState.toString());
        bb.append("uptime", (unsigned)(time(0) - cmdLine.started));
        if (!_self->config().arbiterOnly) {
            GTID lastLive;
            GTID lastUnapplied;
            GTID minLive;
            GTID minUnapplied;
            gtidManager->getGTIDs(
                &lastLive,
                &lastUnapplied,
                &minLive,
                &minUnapplied
            );
            bb.appendDate("optimeDate", gtidManager->getCurrTimestamp());
            bb.append("lastGTID", lastLive.toString());
            bb.append("lastUnappliedGTID", lastUnapplied.toString());
            bb.append("minLiveGTID", minLive.toString());
            bb.append("minUnappliedGTID", minUnapplied.toString());
            bb.append("oplogVersion", ReplSetConfig::OPLOG_VERSION);
        }

        int maintenance = _maintenanceMode;
        if (maintenance) {
            bb.append("maintenanceMode", maintenance);
        }

        if (theReplSet) {
            string s = theReplSet->hbmsg();
            if( !s.empty() )
                bb.append("errmsg", s);
        }
        bb.append("self", true);
        v.push_back(bb.obj());
    }

    Member *m =_members.head();
    while( m ) {
        BSONObjBuilder bb;
        bb.append("_id", (int) m->id());
        bb.append("name", m->fullName());
        double h = m->hbinfo().health;
        bb.append("health", h);
        bb.append("state", (int) m->state().s);
        if( h == 0 ) {
            // if we can't connect the state info is from the past and could be confusing to show
            bb.append("stateStr", "(not reachable/healthy)");
        }
        else {
            bb.append("stateStr", m->state().toString());
        }
        bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0));
        if (!m->config().arbiterOnly) {
            bb.appendDate("optimeDate", m->hbinfo().opTime);
            bb.append("lastGTID", m->hbinfo().gtid.toString());
            bb.append("lastUnappliedGTID", m->hbinfo().lastUnappliedGTID.toString());
            bb.append("minLiveGTID", m->hbinfo().minLiveGTID.toString());
            bb.append("minUnappliedGTID", m->hbinfo().minUnappliedGTID.toString());
            bb.append("oplogVersion", m->hbinfo().oplogVersion);
        }
        bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat);
        bb.appendTimeT("lastHeartbeatRecv", m->getLastRecvHeartbeat());
        bb.append("pingMs", m->hbinfo().ping);
        string s = m->lhb();
        if( !s.empty() )
            bb.append("lastHeartbeatMessage", s);

        if (m->hbinfo().authIssue) {
            bb.append("authenticated", false);
        }

        string syncingTo = m->hbinfo().syncingTo;
        if (!syncingTo.empty()) {
            bb.append("syncingTo", syncingTo);
        }

        v.push_back(bb.obj());
        m = m->next();
    }
    sort(v.begin(), v.end());
    b.append("set", name());
    b.appendTimeT("date", time(0));
    b.append("myState", myState.s);
    const Member *syncTarget = BackgroundSync::get()->getSyncTarget();
    if ( syncTarget &&
            (myState != MemberState::RS_PRIMARY) &&
            (myState != MemberState::RS_SHUNNED) ) {
        b.append("syncingTo", syncTarget->fullName());
    }
    b.append("members", v);
    if( replSetBlind )
        b.append("blind",true); // to avoid confusion if set...normally never set except for testing.
}
Exemplo n.º 17
0
    void BackgroundSync::producerThread() {
        {
            boost::unique_lock<boost::mutex> lock(_mutex);
            _opSyncInProgress = true;
        }
        Client::initThread("rsBackgroundSync");
        replLocalAuth();
        uint32_t timeToSleep = 0;

        while (!_opSyncShouldExit) {
            try {
                if (timeToSleep) {
                    {
                        boost::unique_lock<boost::mutex> lck(_mutex);
                        _opSyncRunning = false;
                        // notify other threads that we are not running
                        _opSyncRunningCondVar.notify_all();
                    }
                    for (uint32_t i = 0; i < timeToSleep; i++) {
                        sleepsecs(1);
                        // get out if we need to
                        if (_opSyncShouldExit) { break; }
                    }
                    timeToSleep = 0;
                }
                // get out if we need to
                if (_opSyncShouldExit) { break; }

                {
                    boost::unique_lock<boost::mutex> lck(_mutex);
                    _opSyncRunning = false;

                    while (!_opSyncShouldRun && !_opSyncShouldExit) {
                        // notify other threads that we are not running
                        _opSyncRunningCondVar.notify_all();
                        // wait for permission that we can run
                        _opSyncCanRunCondVar.wait(lck);
                    }

                    // notify other threads that we are running
                    _opSyncRunningCondVar.notify_all();
                    _opSyncRunning = true;
                }
                // get out if we need to
                if (_opSyncShouldExit) { break; }

                MemberState state = theReplSet->state();
                if (state.fatal() || state.startup()) {
                    timeToSleep = 5;
                    continue;
                }
                // this does the work of reading a remote oplog
                // and writing it to our oplog
                timeToSleep = produce();
            }
            catch (DBException& e) {
                sethbmsg(str::stream() << "db exception in producer: " << e.toString());
                timeToSleep = 10;
            }
            catch (std::exception& e2) {
                sethbmsg(str::stream() << "exception in producer: " << e2.what());
                timeToSleep = 10;
            }
        }

        cc().shutdown();
        {
            boost::unique_lock<boost::mutex> lock(_mutex);
            _opSyncRunning = false;
            _opSyncInProgress = false;
        }
    }
Exemplo n.º 18
0
    void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const {
        vector<BSONObj> v;

        const Member *_self = this->_self;
        verify( _self );

        MemberState myState = box.getState();

        // add self
        {
            BSONObjBuilder bb;
            bb.append("_id", (int) _self->id());
            bb.append("name", _self->fullName());
            bb.append("health", 1.0);
            bb.append("state", (int)myState.s);
            bb.append("stateStr", myState.toString());
            bb.append("uptime", (unsigned)(time(0) - cmdLine.started));
            if (!_self->config().arbiterOnly) {
                bb.appendTimestamp("optime", lastOpTimeWritten.asDate());
                bb.appendDate("optimeDate", lastOpTimeWritten.getSecs() * 1000LL);
            }

            int maintenance = _maintenanceMode;
            if (maintenance) {
                bb.append("maintenanceMode", maintenance);
            }

            if (theReplSet) {
                string s = theReplSet->hbmsg();
                if( !s.empty() )
                    bb.append("errmsg", s);
            }
            bb.append("self", true);
            v.push_back(bb.obj());
        }

        Member *m =_members.head();
        while( m ) {
            BSONObjBuilder bb;
            bb.append("_id", (int) m->id());
            bb.append("name", m->fullName());
            double h = m->hbinfo().health;
            bb.append("health", h);
            bb.append("state", (int) m->state().s);
            if( h == 0 ) {
                // if we can't connect the state info is from the past and could be confusing to show
                bb.append("stateStr", "(not reachable/healthy)");
            }
            else {
                bb.append("stateStr", m->state().toString());
            }
            bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0));
            if (!m->config().arbiterOnly) {
                bb.appendTimestamp("optime", m->hbinfo().opTime.asDate());
                bb.appendDate("optimeDate", m->hbinfo().opTime.getSecs() * 1000LL);
            }
            bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat);
            bb.append("pingMs", m->hbinfo().ping);
            string s = m->lhb();
            if( !s.empty() )
                bb.append("errmsg", s);

            if (m->hbinfo().authIssue) {
                bb.append("authenticated", false);
            }

            v.push_back(bb.obj());
            m = m->next();
        }
        sort(v.begin(), v.end());
        b.append("set", name());
        b.appendTimeT("date", time(0));
        b.append("myState", myState.s);
        const Member *syncTarget = _currentSyncTarget;
        if ( syncTarget && 
            (myState != MemberState::RS_PRIMARY) && 
            (myState != MemberState::RS_SHUNNED) ) {
            b.append("syncingTo", syncTarget->fullName());
        }
        b.append("members", v);
        if( replSetBlind )
            b.append("blind",true); // to avoid confusion if set...normally never set except for testing.
    }
Exemplo n.º 19
0
void runSyncThread() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

    // Set initial indexPrefetch setting
    const std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
    if (!prefetch.empty()) {
        BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
        if (prefetch == "none")
            prefetchConfig = BackgroundSync::PREFETCH_NONE;
        else if (prefetch == "_id_only")
            prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
        else if (prefetch == "all")
            prefetchConfig = BackgroundSync::PREFETCH_ALL;
        else {
            warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
                      << "to \"all\"";
        }
        BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
    }

    while (!inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
            // Check criteria for doing an initial sync:
            // 1. If the oplog is empty, do an initial sync
            // 2. If minValid has _initialSyncFlag set, do an initial sync
            // 3. If initialSyncRequested is true
            if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                getInitialSyncFlag() || initialSyncRequested) {
                syncDoInitialSync();
                continue;  // start from top again in case sync failed.
            }
            if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            /* we have some data.  continue tailing. */
            SyncTail tail(BackgroundSync::get(), multiSyncApply);
            tail.oplogApplication();
        } catch (const DBException& e) {
            log() << "Received exception while syncing: " << e.toString();
            sleepsecs(10);
        } catch (const std::exception& e) {
            log() << "Received exception while syncing: " << e.what();
            sleepsecs(10);
        }
    }
}
Exemplo n.º 20
0
    void runSyncThread() {
        Client::initThread("rsSync");
        replLocalAuth();
        ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

        // Set initial indexPrefetch setting
        std::string& prefetch = replCoord->getSettings().rsIndexPrefetch;
        if (!prefetch.empty()) {
            BackgroundSync::IndexPrefetchConfig prefetchConfig = BackgroundSync::PREFETCH_ALL;
            if (prefetch == "none")
                prefetchConfig = BackgroundSync::PREFETCH_NONE;
            else if (prefetch == "_id_only")
                prefetchConfig = BackgroundSync::PREFETCH_ID_ONLY;
            else if (prefetch == "all")
                prefetchConfig = BackgroundSync::PREFETCH_ALL;
            else {
                warning() << "unrecognized indexPrefetch setting " << prefetch << ", defaulting "
                          << "to \"all\"";
            }
            BackgroundSync::get()->setIndexPrefetchConfig(prefetchConfig);
        }

        while (!inShutdown()) {
            // After a reconfig, we may not be in the replica set anymore, so
            // check that we are in the set (and not an arbiter) before
            // trying to sync with other replicas.
            // TODO(spencer): Use a condition variable to await loading a config
            if (replCoord->getReplicationMode() != ReplicationCoordinator::modeReplSet) {
                log() << "replSet warning did not receive a valid config yet, sleeping 5 seconds "
                      << rsLog;
                sleepsecs(5);
                continue;
            }

            const MemberState memberState = replCoord->getCurrentMemberState();
            if (replCoord->getCurrentMemberState().arbiter()) {
                break;
            }

            try {

                if (memberState.primary()) {
                    sleepsecs(1);
                    continue;
                }

                bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
                // Check criteria for doing an initial sync:
                // 1. If the oplog is empty, do an initial sync
                // 2. If minValid has _initialSyncFlag set, do an initial sync
                // 3. If initialSyncRequested is true
                if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                        getInitialSyncFlag() ||
                        initialSyncRequested) {
                    syncDoInitialSync();
                    continue; // start from top again in case sync failed.
                }
                replCoord->setFollowerMode(MemberState::RS_RECOVERING);

                /* we have some data.  continue tailing. */
                SyncTail tail(BackgroundSync::get(), multiSyncApply);
                tail.oplogApplication();
            }
            catch(const DBException& e) {
                log() << "Received exception while syncing: " << e.toString();
                sleepsecs(10);
            }
            catch(...) {
                sethbmsg("unexpected exception in syncThread()");
                // TODO : SET NOT SECONDARY here?
                sleepsecs(60);
            }
        }
        cc().shutdown();
    }
Exemplo n.º 21
0
void runSyncThread() {
    Client::initThread("rsSync");
    AuthorizationSession::get(cc())->grantInternalAuthorization();
    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();

    // Overwrite prefetch index mode in BackgroundSync if ReplSettings has a mode set.
    ReplSettings replSettings = replCoord->getSettings();
    if (replSettings.isPrefetchIndexModeSet())
        BackgroundSync::get()->setIndexPrefetchConfig(replSettings.getPrefetchIndexMode());

    while (!inShutdown()) {
        // After a reconfig, we may not be in the replica set anymore, so
        // check that we are in the set (and not an arbiter) before
        // trying to sync with other replicas.
        // TODO(spencer): Use a condition variable to await loading a config
        if (replCoord->getMemberState().startup()) {
            warning() << "did not receive a valid config yet";
            sleepsecs(1);
            continue;
        }

        const MemberState memberState = replCoord->getMemberState();

        // An arbiter can never transition to any other state, and doesn't replicate, ever
        if (memberState.arbiter()) {
            break;
        }

        // If we are removed then we don't belong to the set anymore
        if (memberState.removed()) {
            sleepsecs(5);
            continue;
        }

        try {
            if (memberState.primary() && !replCoord->isWaitingForApplierToDrain()) {
                sleepsecs(1);
                continue;
            }

            bool initialSyncRequested = BackgroundSync::get()->getInitialSyncRequestedFlag();
            // Check criteria for doing an initial sync:
            // 1. If the oplog is empty, do an initial sync
            // 2. If minValid has _initialSyncFlag set, do an initial sync
            // 3. If initialSyncRequested is true
            if (getGlobalReplicationCoordinator()->getMyLastOptime().isNull() ||
                getInitialSyncFlag() || initialSyncRequested) {
                syncDoInitialSync();
                continue;  // start from top again in case sync failed.
            }
            if (!replCoord->setFollowerMode(MemberState::RS_RECOVERING)) {
                continue;
            }

            /* we have some data.  continue tailing. */
            SyncTail tail(BackgroundSync::get(), multiSyncApply);
            tail.oplogApplication();
        } catch (...) {
            std::terminate();
        }
    }
}