コード例 #1
0
void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long originalTerm) {
    invariant(_voteRequester);
    invariant(!_electionWinnerDeclarer);
    LoseElectionGuardV1 lossGuard(this);

    if (_topCoord->getTerm() != originalTerm) {
        log() << "not becoming primary, we have been superceded already";
        return;
    }

    const VoteRequester::VoteRequestResult endResult = _voteRequester->getResult();

    if (endResult == VoteRequester::InsufficientVotes) {
        log() << "not becoming primary, we received insufficient votes";
        return;
    } else if (endResult == VoteRequester::StaleTerm) {
        log() << "not becoming primary, we have been superceded already";
        return;
    } else if (endResult != VoteRequester::SuccessfullyElected) {
        log() << "not becoming primary, we received an unexpected problem";
        return;
    }

    log() << "election succeeded, assuming primary role in term " << _topCoord->getTerm();
    // Prevent last committed optime from updating until we finish draining.
    _setFirstOpTimeOfMyTerm(
        OpTime(Timestamp(std::numeric_limits<int>::max(), 0), std::numeric_limits<int>::max()));
    _performPostMemberStateUpdateAction(kActionWinElection);

    _voteRequester.reset(nullptr);
    _replExecutor.signalEvent(_electionFinishedEvent);
    lossGuard.dismiss();
}
コード例 #2
0
    void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
            const ReplicationExecutor::CallbackData& cbData,
            const ReplicaSetConfig& newConfig,
            StatusWith<int> myIndex) {
        if (cbData.status == ErrorCodes::CallbackCanceled) {
            return;
        }

        boost::unique_lock<boost::mutex> lk(_mutex);
        invariant(_rsConfigState == kConfigHBReconfiguring);
        invariant(!_rsConfig.isInitialized() ||
                  _rsConfig.getConfigVersion() < newConfig.getConfigVersion());

        if (_getMemberState_inlock().primary() && !cbData.txn) {
            // Not having an OperationContext in the CallbackData means we definitely aren't holding
            // the global lock.  Since we're primary and this reconfig could cause us to stepdown,
            // reschedule this work with the global exclusive lock so the stepdown is safe.
            // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that
            // we are indeed holding the global lock.
            _replExecutor.scheduleWorkWithGlobalExclusiveLock(
                    stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                               this,
                               stdx::placeholders::_1,
                               newConfig,
                               myIndex));
            return;
        }

        if (!myIndex.isOK()) {
            switch (myIndex.getStatus().code()) {
            case ErrorCodes::NodeNotFound:
                log() << "Cannot find self in new replica set configuration; I must be removed; " <<
                    myIndex.getStatus();
                break;
            case ErrorCodes::DuplicateKey:
                error() << "Several entries in new config represent this node; "
                    "Removing self until an acceptable configuration arrives; " <<
                    myIndex.getStatus();
                break;
            default:
                error() << "Could not validate configuration received from remote node; "
                    "Removing self until an acceptable configuration arrives; " <<
                    myIndex.getStatus();
                break;
            }
            myIndex = StatusWith<int>(-1);
        }
        const PostMemberStateUpdateAction action =
            _setCurrentRSConfig_inlock(newConfig, myIndex.getValue());
        lk.unlock();
        _performPostMemberStateUpdateAction(action);
    }
コード例 #3
0
void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
    stdx::unique_lock<stdx::mutex> lk(_mutex);
    LoseElectionGuard lossGuard(_topCoord.get(),
                                _replExecutor.get(),
                                &_freshnessChecker,
                                &_electCmdRunner,
                                &_electionFinishedEvent);

    invariant(_freshnessChecker);
    invariant(_electCmdRunner);
    if (_electCmdRunner->isCanceled()) {
        LOG(2) << "Election canceled during elect self phase";
        return;
    }

    const int receivedVotes = _electCmdRunner->getReceivedVotes();

    if (receivedVotes < _rsConfig.getMajorityVoteCount()) {
        log() << "couldn't elect self, only received " << receivedVotes
              << " votes, but needed at least " << _rsConfig.getMajorityVoteCount();
        // Suppress ourselves from standing for election again, giving other nodes a chance
        // to win their elections.
        const auto ms = Milliseconds(_nextRandomInt64_inlock(1000) + 50);
        const Date_t now(_replExecutor->now());
        const Date_t nextCandidateTime = now + ms;
        log() << "waiting until " << nextCandidateTime << " before standing for election again";
        _topCoord->setElectionSleepUntil(nextCandidateTime);
        _scheduleWorkAt(nextCandidateTime,
                        stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
                                   this,
                                   stdx::placeholders::_1));
        return;
    }

    if (_rsConfig.getConfigVersion() != _freshnessChecker->getOriginalConfigVersion()) {
        log() << "config version changed during our election, ignoring result";
        return;
    }

    log() << "election succeeded, assuming primary role";

    lossGuard.dismiss();
    _freshnessChecker.reset(NULL);
    _electCmdRunner.reset(NULL);
    auto electionFinishedEvent = _electionFinishedEvent;
    lk.unlock();
    _performPostMemberStateUpdateAction(kActionWinElection);
    _replExecutor->signalEvent(electionFinishedEvent);
}
コード例 #4
0
void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long originalTerm) {
    invariant(_voteRequester);
    LoseElectionGuardV1 lossGuard(this);

    LockGuard lk(_topoMutex);

    if (_topCoord->getTerm() != originalTerm) {
        log() << "not becoming primary, we have been superceded already";
        return;
    }

    const VoteRequester::Result endResult = _voteRequester->getResult();

    switch (endResult) {
        case VoteRequester::Result::kInsufficientVotes:
            log() << "not becoming primary, we received insufficient votes";
            return;
        case VoteRequester::Result::kStaleTerm:
            log() << "not becoming primary, we have been superceded already";
            return;
        case VoteRequester::Result::kSuccessfullyElected:
            log() << "election succeeded, assuming primary role in term " << _topCoord->getTerm();
            break;
    }

    {
        // Mark all nodes that responded to our vote request as up to avoid immediately
        // relinquishing primary.
        stdx::lock_guard<stdx::mutex> lk(_mutex);
        Date_t now = _replExecutor.now();
        const unordered_set<HostAndPort> liveNodes = _voteRequester->getResponders();
        for (auto& nodeInfo : _slaveInfo) {
            if (liveNodes.count(nodeInfo.hostAndPort)) {
                nodeInfo.down = false;
                nodeInfo.lastUpdate = now;
            }
        }
    }

    // Prevent last committed optime from updating until we finish draining.
    _setFirstOpTimeOfMyTerm(
        OpTime(Timestamp(std::numeric_limits<int>::max(), 0), std::numeric_limits<int>::max()));
    _performPostMemberStateUpdateAction(kActionWinElection);

    _voteRequester.reset(nullptr);
    _replExecutor.signalEvent(_electionFinishedEvent);
    lossGuard.dismiss();
}
コード例 #5
0
    void ReplicationCoordinatorImpl::_heartbeatStepDownFinish(
            const ReplicationExecutor::CallbackData& cbData) {

        if (cbData.status == ErrorCodes::CallbackCanceled) {
            return;
        }
        invariant(cbData.txn);
        // TODO Add invariant that we've got global shared or global exclusive lock, when supported
        // by lock manager.
        boost::unique_lock<boost::mutex> lk(_mutex);
        _topCoord->stepDownIfPending();
        const PostMemberStateUpdateAction action =
            _updateMemberStateFromTopologyCoordinator_inlock();
        lk.unlock();
        _performPostMemberStateUpdateAction(action);
    }
コード例 #6
0
    void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long originalTerm) {
        invariant(_voteRequester);
        invariant(!_electionWinnerDeclarer);
        LoseElectionGuardV1 lossGuard(_topCoord.get(),
                                    &_replExecutor,
                                    &_voteRequester,
                                    &_electionWinnerDeclarer,
                                    &_electionFinishedEvent);

        if (_topCoord->getTerm() != originalTerm) {
            log() << "not becoming primary, we have been superceded already";
            return;
        }

        const VoteRequester::VoteRequestResult endResult = _voteRequester->getResult();

        if (endResult == VoteRequester::InsufficientVotes) {
            log() << "not becoming primary, we received insufficient votes";
            return;
        }
        else if (endResult == VoteRequester::StaleTerm) {
            log() << "not becoming primary, we have been superceded already";
            return;
        }
        else if (endResult != VoteRequester::SuccessfullyElected) {
            log() << "not becoming primary, we received an unexpected problem";
            return;
        }

        log() << "election succeeded, assuming primary role";
        _performPostMemberStateUpdateAction(kActionWinElection);

        _electionWinnerDeclarer.reset(new ElectionWinnerDeclarer);
        StatusWith<ReplicationExecutor::EventHandle> nextPhaseEvh = _electionWinnerDeclarer->start(
                &_replExecutor,
                _rsConfig.getReplSetName(),
                _rsConfig.getMemberAt(_selfIndex).getId(),
                _topCoord->getTerm(),
                _topCoord->getMaybeUpHostAndPorts(),
                stdx::bind(&ReplicationCoordinatorImpl::_onElectionWinnerDeclarerComplete, this));
        if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
            return;
        }
        fassert(28644, nextPhaseEvh.getStatus());
        lossGuard.dismiss();
    }
コード例 #7
0
void ReplicationCoordinatorImpl::_onVoteRequestComplete(long long newTerm) {
    stdx::unique_lock<stdx::mutex> lk(_mutex);
    LoseElectionGuardV1 lossGuard(this);

    invariant(_voteRequester);

    if (_topCoord->getTerm() != newTerm) {
        log() << "not becoming primary, we have been superseded already during election. "
              << "election term: " << newTerm << ", current term: " << _topCoord->getTerm();
        return;
    }

    const VoteRequester::Result endResult = _voteRequester->getResult();
    invariant(endResult != VoteRequester::Result::kPrimaryRespondedNo);

    switch (endResult) {
        case VoteRequester::Result::kInsufficientVotes:
            log() << "not becoming primary, we received insufficient votes";
            return;
        case VoteRequester::Result::kStaleTerm:
            log() << "not becoming primary, we have been superseded already";
            return;
        case VoteRequester::Result::kSuccessfullyElected:
            log() << "election succeeded, assuming primary role in term " << _topCoord->getTerm();
            break;
        case VoteRequester::Result::kPrimaryRespondedNo:
            // This is impossible because we would only require the primary's
            // vote during a dry run.
            invariant(false);
    }

    // Mark all nodes that responded to our vote request as up to avoid immediately
    // relinquishing primary.
    Date_t now = _replExecutor->now();
    _topCoord->resetMemberTimeouts(now, _voteRequester->getResponders());

    _voteRequester.reset();
    auto electionFinishedEvent = _electionFinishedEvent;

    lk.unlock();
    _performPostMemberStateUpdateAction(kActionWinElection);

    _replExecutor->signalEvent(electionFinishedEvent);
    lossGuard.dismiss();
}
コード例 #8
0
    void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
            const HeartbeatResponseAction& action,
            const StatusWith<ReplSetHeartbeatResponse>& responseStatus) {

        switch (action.getAction()) {
        case HeartbeatResponseAction::NoAction:
            // Update the cached member state if different than the current topology member state
            if (_memberState != _topCoord->getMemberState()) {
                boost::unique_lock<boost::mutex> lk(_mutex);
                const PostMemberStateUpdateAction postUpdateAction =
                    _updateMemberStateFromTopologyCoordinator_inlock();
                lk.unlock();
                _performPostMemberStateUpdateAction(postUpdateAction);
            }
            break;
        case HeartbeatResponseAction::Reconfig:
            invariant(responseStatus.isOK());
            _scheduleHeartbeatReconfig(responseStatus.getValue().getConfig());
            break;
        case HeartbeatResponseAction::StartElection:
            if (isV1ElectionProtocol()) {
                _startElectSelfV1();
            }
            else {
                _startElectSelf();
            }
            break;
        case HeartbeatResponseAction::StepDownSelf:
            invariant(action.getPrimaryConfigIndex() == _selfIndex);
            _heartbeatStepDownStart();
            break;
        case HeartbeatResponseAction::StepDownRemotePrimary: {
            invariant(action.getPrimaryConfigIndex() != _selfIndex);
            _requestRemotePrimaryStepdown(
                    _rsConfig.getMemberAt(action.getPrimaryConfigIndex()).getHostAndPort());
            break;
        }
        default:
            severe() << "Illegal heartbeat response action code " << int(action.getAction());
            invariant(false);
        }
    }