StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig) { Status status = newConfig.validate(); if (!status.isOK()) { return StatusWith<int>(status); } if (newConfig.getConfigVersion() != 1) { return StatusWith<int>(ErrorCodes::NewReplicaSetConfigurationIncompatible, str::stream() << "Configuration used to initiate a replica set must " << " have version 1, but found " << newConfig.getConfigVersion()); } return findSelfInConfigIfElectable(externalState, newConfig); }
void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) { boost::lock_guard<boost::mutex> lk(_mutex); if (_inShutdown) { return; } switch (_rsConfigState) { case kConfigStartingUp: LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() << " because still attempting to load local configuration information"; return; case kConfigUninitialized: case kConfigSteady: LOG(1) << "Received new config via heartbeat with version " << newConfig.getConfigVersion(); break; case kConfigInitiating: case kConfigReconfiguring: case kConfigHBReconfiguring: LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() << " because already in the midst of a configuration process"; return; default: severe() << "Reconfiguration request occurred while _rsConfigState == " << int(_rsConfigState) << "; aborting."; fassertFailed(18807); } _setConfigState_inlock(kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersion() < newConfig.getConfigVersion()); if (_freshnessChecker) { _freshnessChecker->cancel(&_replExecutor); if (_electCmdRunner) { _electCmdRunner->cancel(&_replExecutor); } _replExecutor.onEvent( _electionFinishedEvent, stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled, this, stdx::placeholders::_1, newConfig)); return; } _replExecutor.scheduleDBWork(stdx::bind( &ReplicationCoordinatorImpl::_heartbeatReconfigStore, this, stdx::placeholders::_1, newConfig)); }
void ReplCoordTest::simulateSuccessfulElection() { OperationContextReplMock txn; ReplicationCoordinatorImpl* replCoord = getReplCoord(); NetworkInterfaceMock* net = getNet(); ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest(); ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString(); while (!replCoord->getMemberState().primary()) { log() << "Waiting on network in state " << replCoord->getMemberState(); getNet()->enterNetwork(); const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); log() << request.target.toString() << " processing " << request.cmdObj; ReplSetHeartbeatArgs hbArgs; if (hbArgs.initialize(request.cmdObj).isOK()) { ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); BSONObjBuilder respObj; respObj << "ok" << 1; hbResp.addToBSON(&respObj, false); net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj())); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") { net->scheduleResponse( noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "fresher" << false << "opTime" << Date_t() << "veto" << false))); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetElect") { net->scheduleResponse(noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "vote" << 1 << "round" << request.cmdObj["round"].OID()))); } else { error() << "Black holing unexpected request to " << request.target << ": " << request.cmdObj; net->blackHole(noi); } net->runReadyNetworkOperations(); getNet()->exitNetwork(); } ASSERT(replCoord->isWaitingForApplierToDrain()); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); IsMasterResponse imResponse; replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString(); replCoord->signalDrainComplete(&txn); replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString(); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); // Consume the notification of election win. for (int i = 0; i < rsConfig.getNumMembers() - 1; i++) { replyToReceivedHeartbeat(); } }
void ReplCoordTest::simulateEnoughHeartbeatsForAllNodesUp() { ReplicationCoordinatorImpl* replCoord = getReplCoord(); ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest(); NetworkInterfaceMock* net = getNet(); net->enterNetwork(); for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) { const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); log() << request.target.toString() << " processing " << request.cmdObj; ReplSetHeartbeatArgsV1 hbArgs; ReplSetHeartbeatArgs hbArgsPV0; if (hbArgs.initialize(request.cmdObj).isOK() || hbArgsPV0.initialize(request.cmdObj).isOK()) { ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); hbResp.setAppliedOpTime(OpTime(Timestamp(100, 2), 0)); BSONObjBuilder respObj; net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true))); } else { error() << "Black holing unexpected request to " << request.target << ": " << request.cmdObj; net->blackHole(noi); } net->runReadyNetworkOperations(); } net->exitNetwork(); }
void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( const ReplicationExecutor::CallbackData& cbData, const ReplicaSetConfig& newConfig, StatusWith<int> myIndex) { boost::lock_guard<boost::mutex> lk(_mutex); invariant(_rsConfigState == kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersion() < newConfig.getConfigVersion()); if (!myIndex.isOK()) { switch (myIndex.getStatus().code()) { case ErrorCodes::NoSuchKey: log() << "Cannot find self in new replica set configuration; I must be removed; " << myIndex.getStatus(); break; case ErrorCodes::DuplicateKey: error() << "Several entries in new config represent this node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; default: error() << "Could not validate configuration received from remote node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; } myIndex = StatusWith<int>(-1); } _setCurrentRSConfig_inlock(newConfig, myIndex.getValue()); }
Status checkQuorumForReconfig(ReplicationExecutor* executor, const ReplicaSetConfig& rsConfig, const int myIndex) { invariant(rsConfig.getConfigVersion() > 1); QuorumChecker checker(&rsConfig, myIndex); return checker.run(executor); }
void ReplCoordTest::simulateSuccessfulV1Election() { OperationContextReplMock txn; ReplicationCoordinatorImpl* replCoord = getReplCoord(); NetworkInterfaceMock* net = getNet(); ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest(); ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString(); while (!replCoord->getMemberState().primary()) { log() << "Waiting on network in state " << replCoord->getMemberState(); getNet()->enterNetwork(); const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); log() << request.target.toString() << " processing " << request.cmdObj; ReplSetHeartbeatArgsV1 hbArgs; Status status = hbArgs.initialize(request.cmdObj); if (hbArgs.initialize(request.cmdObj).isOK()) { ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true))); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") { net->scheduleResponse( noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "reason" << "" << "term" << request.cmdObj["term"].Long() << "voteGranted" << true))); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetDeclareElectionWinner") { net->scheduleResponse( noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "term" << request.cmdObj["term"].Long()))); } else { error() << "Black holing unexpected request to " << request.target << ": " << request.cmdObj; net->blackHole(noi); } net->runReadyNetworkOperations(); getNet()->exitNetwork(); } ASSERT(replCoord->isWaitingForApplierToDrain()); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); IsMasterResponse imResponse; replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString(); replCoord->signalDrainComplete(&txn); replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString(); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); }
StatusWith<ReplicationExecutor::EventHandle> FreshnessChecker::start( ReplicationExecutor* executor, const Timestamp& lastOpTimeApplied, const ReplicaSetConfig& currentConfig, int selfIndex, const std::vector<HostAndPort>& targets) { _originalConfigVersion = currentConfig.getConfigVersion(); _algorithm.reset(new Algorithm(lastOpTimeApplied, currentConfig, selfIndex, targets)); _runner.reset(new ScatterGatherRunner(_algorithm.get(), executor)); return _runner->start(); }
void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( const ReplicationExecutor::CallbackArgs& cbData, const ReplicaSetConfig& newConfig, StatusWith<int> myIndex) { if (cbData.status == ErrorCodes::CallbackCanceled) { return; } stdx::unique_lock<stdx::mutex> lk(_mutex); invariant(_rsConfigState == kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersion() < newConfig.getConfigVersion()); if (_getMemberState_inlock().primary() && !cbData.txn) { // Not having an OperationContext in the CallbackData means we definitely aren't holding // the global lock. Since we're primary and this reconfig could cause us to stepdown, // reschedule this work with the global exclusive lock so the stepdown is safe. // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that // we are indeed holding the global lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); return; } if (!myIndex.isOK()) { switch (myIndex.getStatus().code()) { case ErrorCodes::NodeNotFound: log() << "Cannot find self in new replica set configuration; I must be removed; " << myIndex.getStatus(); break; case ErrorCodes::DuplicateKey: error() << "Several entries in new config represent this node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; default: error() << "Could not validate configuration received from remote node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; } myIndex = StatusWith<int>(-1); } const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(cbData, newConfig, myIndex.getValue()); lk.unlock(); _resetElectionInfoOnProtocolVersionUpgrade(newConfig); _performPostMemberStateUpdateAction(action); }
void ReplCoordTest::replyToReceivedHeartbeatV1() { NetworkInterfaceMock* net = getNet(); net->enterNetwork(); const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); const ReplicaSetConfig rsConfig = getReplCoord()->getReplicaSetConfig_forTest(); repl::ReplSetHeartbeatArgsV1 hbArgs; ASSERT_OK(hbArgs.initialize(request.cmdObj)); repl::ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); BSONObjBuilder respObj; respObj << "ok" << 1; hbResp.addToBSON(&respObj, false); net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj())); net->runReadyNetworkOperations(); getNet()->exitNetwork(); }
Status checkQuorumForReconfig(ReplicationExecutor* executor, const ReplicaSetConfig& rsConfig, const int myIndex) { invariant(rsConfig.getConfigVersion() > 1); return checkQuorumGeneral(executor, rsConfig, myIndex); }