OplogFetcher::OplogFetcher(executor::TaskExecutor* exec, OpTimeWithHash lastFetched, HostAndPort source, NamespaceString oplogNSS, ReplicaSetConfig config, DataReplicatorExternalState* dataReplicatorExternalState, EnqueueDocumentsFn enqueueDocumentsFn, OnShutdownCallbackFn onShutdownCallbackFn) : _dataReplicatorExternalState(dataReplicatorExternalState), _fetcher(exec, source, oplogNSS.db().toString(), makeFindCommandObject(dataReplicatorExternalState, oplogNSS, lastFetched.opTime), stdx::bind( &OplogFetcher::_callback, this, stdx::placeholders::_1, stdx::placeholders::_3), uassertStatusOK(makeMetadataObject(config.getProtocolVersion() == 1LL)), config.getElectionTimeoutPeriod()), _enqueueDocumentsFn(enqueueDocumentsFn), _awaitDataTimeout(calculateAwaitDataTimeout(config)), _onShutdownCallbackFn(onShutdownCallbackFn), _lastFetched(lastFetched) { uassert(ErrorCodes::BadValue, "null last optime fetched", !lastFetched.opTime.isNull()); uassert(ErrorCodes::InvalidReplicaSetConfig, "uninitialized replica set configuration", config.isInitialized()); uassert(ErrorCodes::BadValue, "null enqueueDocuments function", enqueueDocumentsFn); uassert(ErrorCodes::BadValue, "null onShutdownCallback function", onShutdownCallbackFn); }
void ReplCoordTest::simulateEnoughHeartbeatsForAllNodesUp() { ReplicationCoordinatorImpl* replCoord = getReplCoord(); ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest(); NetworkInterfaceMock* net = getNet(); net->enterNetwork(); for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) { const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); log() << request.target.toString() << " processing " << request.cmdObj; ReplSetHeartbeatArgsV1 hbArgs; ReplSetHeartbeatArgs hbArgsPV0; if (hbArgs.initialize(request.cmdObj).isOK() || hbArgsPV0.initialize(request.cmdObj).isOK()) { ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); hbResp.setAppliedOpTime(OpTime(Timestamp(100, 2), 0)); BSONObjBuilder respObj; net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true))); } else { error() << "Black holing unexpected request to " << request.target << ": " << request.cmdObj; net->blackHole(noi); } net->runReadyNetworkOperations(); } net->exitNetwork(); }
MockReplicaSet::MockReplicaSet(const string& setName, size_t nodes) : _setName(setName) { BSONObjBuilder configBuilder; configBuilder.append("_id", setName); configBuilder.append("version", 1); BSONArrayBuilder membersBuilder(configBuilder.subarrayStart("members")); for (size_t n = 0; n < nodes; n++) { std::stringstream str; str << "$" << setName << n << ":27017"; const string hostName(str.str()); if (n == 0) { _primaryHost = hostName; } MockRemoteDBServer* mockServer = new MockRemoteDBServer(hostName); _nodeMap[hostName] = mockServer; MockConnRegistry::get()->addServer(mockServer); membersBuilder.append(BSON("_id" << static_cast<int>(n) << "host" << hostName)); } membersBuilder.done(); ReplicaSetConfig replConfig; fassert(28566, replConfig.initialize(configBuilder.obj())); fassert(28573, replConfig.validate()); setConfig(replConfig); }
void ReplCoordTest::simulateSuccessfulElection() { OperationContextReplMock txn; ReplicationCoordinatorImpl* replCoord = getReplCoord(); NetworkInterfaceMock* net = getNet(); ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest(); ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString(); while (!replCoord->getMemberState().primary()) { log() << "Waiting on network in state " << replCoord->getMemberState(); getNet()->enterNetwork(); const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); log() << request.target.toString() << " processing " << request.cmdObj; ReplSetHeartbeatArgs hbArgs; if (hbArgs.initialize(request.cmdObj).isOK()) { ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); BSONObjBuilder respObj; respObj << "ok" << 1; hbResp.addToBSON(&respObj, false); net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj())); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") { net->scheduleResponse( noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "fresher" << false << "opTime" << Date_t() << "veto" << false))); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetElect") { net->scheduleResponse(noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "vote" << 1 << "round" << request.cmdObj["round"].OID()))); } else { error() << "Black holing unexpected request to " << request.target << ": " << request.cmdObj; net->blackHole(noi); } net->runReadyNetworkOperations(); getNet()->exitNetwork(); } ASSERT(replCoord->isWaitingForApplierToDrain()); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); IsMasterResponse imResponse; replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString(); replCoord->signalDrainComplete(&txn); replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString(); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); // Consume the notification of election win. for (int i = 0; i < rsConfig.getNumMembers() - 1; i++) { replyToReceivedHeartbeat(); } }
void ReplCoordTest::simulateSuccessfulV1Election() { OperationContextReplMock txn; ReplicationCoordinatorImpl* replCoord = getReplCoord(); NetworkInterfaceMock* net = getNet(); ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest(); ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString(); while (!replCoord->getMemberState().primary()) { log() << "Waiting on network in state " << replCoord->getMemberState(); getNet()->enterNetwork(); const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); log() << request.target.toString() << " processing " << request.cmdObj; ReplSetHeartbeatArgsV1 hbArgs; Status status = hbArgs.initialize(request.cmdObj); if (hbArgs.initialize(request.cmdObj).isOK()) { ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true))); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") { net->scheduleResponse( noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "reason" << "" << "term" << request.cmdObj["term"].Long() << "voteGranted" << true))); } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetDeclareElectionWinner") { net->scheduleResponse( noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "term" << request.cmdObj["term"].Long()))); } else { error() << "Black holing unexpected request to " << request.target << ": " << request.cmdObj; net->blackHole(noi); } net->runReadyNetworkOperations(); getNet()->exitNetwork(); } ASSERT(replCoord->isWaitingForApplierToDrain()); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); IsMasterResponse imResponse; replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString(); replCoord->signalDrainComplete(&txn); replCoord->fillIsMasterForReplSet(&imResponse); ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString(); ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString(); ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString(); }
StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig) { Status status = newConfig.validate(); if (!status.isOK()) { return StatusWith<int>(status); } if (newConfig.getConfigVersion() != 1) { return StatusWith<int>(ErrorCodes::NewReplicaSetConfigurationIncompatible, str::stream() << "Configuration used to initiate a replica set must " << " have version 1, but found " << newConfig.getConfigVersion()); } return findSelfInConfigIfElectable(externalState, newConfig); }
void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) { boost::lock_guard<boost::mutex> lk(_mutex); if (_inShutdown) { return; } switch (_rsConfigState) { case kConfigStartingUp: LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() << " because still attempting to load local configuration information"; return; case kConfigUninitialized: case kConfigSteady: LOG(1) << "Received new config via heartbeat with version " << newConfig.getConfigVersion(); break; case kConfigInitiating: case kConfigReconfiguring: case kConfigHBReconfiguring: LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() << " because already in the midst of a configuration process"; return; default: severe() << "Reconfiguration request occurred while _rsConfigState == " << int(_rsConfigState) << "; aborting."; fassertFailed(18807); } _setConfigState_inlock(kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersion() < newConfig.getConfigVersion()); if (_freshnessChecker) { _freshnessChecker->cancel(&_replExecutor); if (_electCmdRunner) { _electCmdRunner->cancel(&_replExecutor); } _replExecutor.onEvent( _electionFinishedEvent, stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled, this, stdx::placeholders::_1, newConfig)); return; } _replExecutor.scheduleDBWork(stdx::bind( &ReplicationCoordinatorImpl::_heartbeatReconfigStore, this, stdx::placeholders::_1, newConfig)); }
StatusWith<int> validateConfigForStartUp(ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& oldConfig, const ReplicaSetConfig& newConfig) { Status status = newConfig.validate(); if (!status.isOK()) { return StatusWith<int>(status); } if (oldConfig.isInitialized()) { status = validateOldAndNewConfigsCompatible(oldConfig, newConfig); if (!status.isOK()) { return StatusWith<int>(status); } } return findSelfInConfig(externalState, newConfig); }
Status checkQuorumForReconfig(ReplicationExecutor* executor, const ReplicaSetConfig& rsConfig, const int myIndex) { invariant(rsConfig.getConfigVersion() > 1); QuorumChecker checker(&rsConfig, myIndex); return checker.run(executor); }
void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) { { boost::scoped_ptr<OperationContext> txn(_externalState->createOperationContext()); Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON()); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; boost::lock_guard<boost::mutex> lk(_mutex); invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { // This is the _only_ case where we can return to kConfigUninitialized from // kConfigHBReconfiguring. _setConfigState_inlock(kConfigUninitialized); } return; } } const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); }
void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( const ReplicationExecutor::CallbackData& cbData, const ReplicaSetConfig& newConfig, StatusWith<int> myIndex) { boost::lock_guard<boost::mutex> lk(_mutex); invariant(_rsConfigState == kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersion() < newConfig.getConfigVersion()); if (!myIndex.isOK()) { switch (myIndex.getStatus().code()) { case ErrorCodes::NoSuchKey: log() << "Cannot find self in new replica set configuration; I must be removed; " << myIndex.getStatus(); break; case ErrorCodes::DuplicateKey: error() << "Several entries in new config represent this node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; default: error() << "Could not validate configuration received from remote node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; } myIndex = StatusWith<int>(-1); } _setCurrentRSConfig_inlock(newConfig, myIndex.getValue()); }
void ReplCoordTest::simulateSuccessfulDryRun( stdx::function<void(const RemoteCommandRequest& request)> onDryRunRequest) { ReplicationCoordinatorImpl* replCoord = getReplCoord(); ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest(); NetworkInterfaceMock* net = getNet(); auto electionTimeoutWhen = replCoord->getElectionTimeout_forTest(); ASSERT_NOT_EQUALS(Date_t(), electionTimeoutWhen); log() << "Election timeout scheduled at " << electionTimeoutWhen << " (simulator time)"; int voteRequests = 0; int votesExpected = rsConfig.getNumMembers() / 2; log() << "Simulating dry run responses - expecting " << votesExpected << " replSetRequestVotes requests"; net->enterNetwork(); while (voteRequests < votesExpected) { if (net->now() < electionTimeoutWhen) { net->runUntil(electionTimeoutWhen); } const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); log() << request.target.toString() << " processing " << request.cmdObj; if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") { ASSERT_TRUE(request.cmdObj.getBoolField("dryRun")); onDryRunRequest(request); net->scheduleResponse( noi, net->now(), makeResponseStatus(BSON("ok" << 1 << "reason" << "" << "term" << request.cmdObj["term"].Long() << "voteGranted" << true))); voteRequests++; } else { error() << "Black holing unexpected request to " << request.target << ": " << request.cmdObj; net->blackHole(noi); } net->runReadyNetworkOperations(); } net->exitNetwork(); log() << "Simulating dry run responses - scheduled " << voteRequests << " replSetRequestVotes responses"; getReplCoord()->waitForElectionDryRunFinish_forTest(); log() << "Simulating dry run responses - dry run completed"; }
StatusWith<int> validateConfigForHeartbeatReconfig( ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig) { Status status = newConfig.validate(); if (!status.isOK()) { return StatusWith<int>(status); } return findSelfInConfig(externalState, newConfig); }
void ReplCoordTest::replyToReceivedHeartbeatV1() { NetworkInterfaceMock* net = getNet(); net->enterNetwork(); const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest(); const RemoteCommandRequest& request = noi->getRequest(); const ReplicaSetConfig rsConfig = getReplCoord()->getReplicaSetConfig_forTest(); repl::ReplSetHeartbeatArgsV1 hbArgs; ASSERT_OK(hbArgs.initialize(request.cmdObj)); repl::ReplSetHeartbeatResponse hbResp; hbResp.setSetName(rsConfig.getReplSetName()); hbResp.setState(MemberState::RS_SECONDARY); hbResp.setConfigVersion(rsConfig.getConfigVersion()); BSONObjBuilder respObj; respObj << "ok" << 1; hbResp.addToBSON(&respObj, false); net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj())); net->runReadyNetworkOperations(); getNet()->exitNetwork(); }
StatusWith<ReplicationExecutor::EventHandle> FreshnessChecker::start( ReplicationExecutor* executor, const Timestamp& lastOpTimeApplied, const ReplicaSetConfig& currentConfig, int selfIndex, const std::vector<HostAndPort>& targets) { _originalConfigVersion = currentConfig.getConfigVersion(); _algorithm.reset(new Algorithm(lastOpTimeApplied, currentConfig, selfIndex, targets)); _runner.reset(new ScatterGatherRunner(_algorithm.get(), executor)); return _runner->start(); }
void ReplicationCoordinatorImpl::_heartbeatReconfigFinish( const ReplicationExecutor::CallbackArgs& cbData, const ReplicaSetConfig& newConfig, StatusWith<int> myIndex) { if (cbData.status == ErrorCodes::CallbackCanceled) { return; } stdx::unique_lock<stdx::mutex> lk(_mutex); invariant(_rsConfigState == kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersion() < newConfig.getConfigVersion()); if (_getMemberState_inlock().primary() && !cbData.txn) { // Not having an OperationContext in the CallbackData means we definitely aren't holding // the global lock. Since we're primary and this reconfig could cause us to stepdown, // reschedule this work with the global exclusive lock so the stepdown is safe. // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that // we are indeed holding the global lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); return; } if (!myIndex.isOK()) { switch (myIndex.getStatus().code()) { case ErrorCodes::NodeNotFound: log() << "Cannot find self in new replica set configuration; I must be removed; " << myIndex.getStatus(); break; case ErrorCodes::DuplicateKey: error() << "Several entries in new config represent this node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; default: error() << "Could not validate configuration received from remote node; " "Removing self until an acceptable configuration arrives; " << myIndex.getStatus(); break; } myIndex = StatusWith<int>(-1); } const PostMemberStateUpdateAction action = _setCurrentRSConfig_inlock(cbData, newConfig, myIndex.getValue()); lk.unlock(); _resetElectionInfoOnProtocolVersionUpgrade(newConfig); _performPostMemberStateUpdateAction(action); }
StatusWith<int> validateConfigForReconfig(ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& oldConfig, const ReplicaSetConfig& newConfig, bool force) { Status status = newConfig.validate(); if (!status.isOK()) { return StatusWith<int>(status); } status = validateOldAndNewConfigsCompatible(oldConfig, newConfig); if (!status.isOK()) { return StatusWith<int>(status); } if (force) { return findSelfInConfig(externalState, newConfig); } return findSelfInConfigIfElectable(externalState, newConfig); }
Status checkQuorumForReconfig(ReplicationExecutor* executor, const ReplicaSetConfig& rsConfig, const int myIndex) { invariant(rsConfig.getConfigVersion() > 1); return checkQuorumGeneral(executor, rsConfig, myIndex); }
ReplicaSetConfig ReplCoordTest::assertMakeRSConfig(const BSONObj& configBson) { ReplicaSetConfig config; ASSERT_OK(config.initialize(configBson)); ASSERT_OK(config.validate()); return config; }
void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) { class StoreThreadGuard { public: StoreThreadGuard(boost::unique_lock<boost::mutex>* lk, boost::scoped_ptr<boost::thread>* thread, bool* inShutdown) : _lk(lk), _thread(thread), _inShutdown(inShutdown) {} ~StoreThreadGuard() { if (!_lk->owns_lock()) { _lk->lock(); } if (*_inShutdown) { return; } _thread->get()->detach(); _thread->reset(NULL); } private: boost::unique_lock<boost::mutex>* const _lk; boost::scoped_ptr<boost::thread>* const _thread; bool* const _inShutdown; }; boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t()); StoreThreadGuard guard(&lk, &_heartbeatReconfigThread, &_inShutdown); const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); if (myIndex.getStatus() == ErrorCodes::NodeNotFound) { lk.lock(); // If this node absent in newConfig, and this node was not previously initialized, // return to kConfigUninitialized immediately, rather than storing the config and // transitioning into the RS_REMOVED state. See SERVER-15740. if (!_rsConfig.isInitialized()) { invariant(_rsConfigState == kConfigHBReconfiguring); LOG(1) << "Ignoring new configuration in heartbeat response because we are " "uninitialized and not a member of the new configuration"; _setConfigState_inlock(kConfigUninitialized); return; } lk.unlock(); } if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) { warning() << "Not persisting new configuration in heartbeat response to disk because " "it is invalid: "<< myIndex.getStatus(); } else { boost::scoped_ptr<OperationContext> txn( _externalState->createOperationContext("WriteReplSetConfig")); Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON()); lk.lock(); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { _setConfigState_inlock(kConfigUninitialized); } return; } lk.unlock(); _externalState->startThreads(); } const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); if (_currentState.primary()) { // If the primary is receiving a heartbeat reconfig, that strongly suggests // that there has been a force reconfiguration. In any event, it might lead // to this node stepping down as primary, so we'd better do it with the global // lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn); } else { _replExecutor.scheduleWork(reconfigFinishFn); } }
Status LegacyReplicationCoordinator::processHeartbeat(const ReplSetHeartbeatArgs& args, ReplSetHeartbeatResponse* response) { if (args.getProtocolVersion() != 1) { return Status(ErrorCodes::BadValue, "incompatible replset protocol version"); } { if (_settings.ourSetName() != args.getSetName()) { log() << "replSet set names do not match, our cmdline: " << _settings.replSet << rsLog; log() << "replSet s: " << args.getSetName() << rsLog; response->noteMismatched(); return Status(ErrorCodes::BadValue, "repl set names do not match"); } } response->noteReplSet(); if( (theReplSet == 0) || (theReplSet->startupStatus == ReplSetImpl::LOADINGCONFIG) ) { if (!args.getSenderHost().empty()) { scoped_lock lck( _settings.discoveredSeeds_mx ); _settings.discoveredSeeds.insert(args.getSenderHost().toString()); } response->setHbMsg("still initializing"); return Status::OK(); } if (theReplSet->name() != args.getSetName()) { response->noteMismatched(); return Status(ErrorCodes::BadValue, "repl set names do not match (2)"); } response->setSetName(theReplSet->name()); MemberState currentState = theReplSet->state(); response->setState(currentState.s); if (currentState == MemberState::RS_PRIMARY) { response->setElectionTime(theReplSet->getElectionTime().asDate()); } response->setElectable(theReplSet->iAmElectable()); response->setHbMsg(theReplSet->hbmsg()); response->setTime((long long) time(0)); response->setOpTime(theReplSet->lastOpTimeWritten.asDate()); const Member *syncTarget = BackgroundSync::get()->getSyncTarget(); if (syncTarget) { response->setSyncingTo(syncTarget->fullName()); } int v = theReplSet->config().version; response->setVersion(v); if (v > args.getConfigVersion()) { ReplicaSetConfig config; fassert(18635, config.initialize(theReplSet->config().asBson())); response->setConfig(config); } Member* from = NULL; if (v == args.getConfigVersion() && args.getSenderId() != -1) { from = theReplSet->getMutableMember(args.getSenderId()); } if (!from) { from = theReplSet->findByName(args.getSenderHost().toString()); if (!from) { return Status::OK(); } } // if we thought that this node is down, let it know if (!from->hbinfo().up()) { response->noteStateDisagreement(); } // note that we got a heartbeat from this node theReplSet->mgr->send(stdx::bind(&ReplSet::msgUpdateHBRecv, theReplSet, from->hbinfo().id(), time(0))); return Status::OK(); }
void ReplicationCoordinatorImpl::_heartbeatReconfigStore( const ReplicationExecutor::CallbackData& cbd, const ReplicaSetConfig& newConfig) { if (cbd.status.code() == ErrorCodes::CallbackCanceled) { log() << "The callback to persist the replica set configuration was canceled - " << "the configuration was not persisted but was used: " << newConfig.toBSON(); return; } boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t()); const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); if (myIndex.getStatus() == ErrorCodes::NodeNotFound) { lk.lock(); // If this node absent in newConfig, and this node was not previously initialized, // return to kConfigUninitialized immediately, rather than storing the config and // transitioning into the RS_REMOVED state. See SERVER-15740. if (!_rsConfig.isInitialized()) { invariant(_rsConfigState == kConfigHBReconfiguring); LOG(1) << "Ignoring new configuration in heartbeat response because we are " "uninitialized and not a member of the new configuration"; _setConfigState_inlock(kConfigUninitialized); return; } lk.unlock(); } if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) { warning() << "Not persisting new configuration in heartbeat response to disk because " "it is invalid: "<< myIndex.getStatus(); } else { Status status = _externalState->storeLocalConfigDocument(cbd.txn, newConfig.toBSON()); lk.lock(); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { _setConfigState_inlock(kConfigUninitialized); } return; } lk.unlock(); _externalState->startThreads(); } const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); // Make sure that the reconfigFinishFn doesn't finish until we've reset // _heartbeatReconfigThread. lk.lock(); if (_memberState.primary()) { // If the primary is receiving a heartbeat reconfig, that strongly suggests // that there has been a force reconfiguration. In any event, it might lead // to this node stepping down as primary, so we'd better do it with the global // lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn); } else { _replExecutor.scheduleWork(reconfigFinishFn); } }