void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) { { boost::scoped_ptr<OperationContext> txn(_externalState->createOperationContext()); Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON()); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; boost::lock_guard<boost::mutex> lk(_mutex); invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { // This is the _only_ case where we can return to kConfigUninitialized from // kConfigHBReconfiguring. _setConfigState_inlock(kConfigUninitialized); } return; } } const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); }
void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) { boost::lock_guard<boost::mutex> lk(_mutex); if (_inShutdown) { return; } switch (_rsConfigState) { case kConfigStartingUp: LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() << " because still attempting to load local configuration information"; return; case kConfigUninitialized: case kConfigSteady: LOG(1) << "Received new config via heartbeat with version " << newConfig.getConfigVersion(); break; case kConfigInitiating: case kConfigReconfiguring: case kConfigHBReconfiguring: LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() << " because already in the midst of a configuration process"; return; default: severe() << "Reconfiguration request occurred while _rsConfigState == " << int(_rsConfigState) << "; aborting."; fassertFailed(18807); } _setConfigState_inlock(kConfigHBReconfiguring); invariant(!_rsConfig.isInitialized() || _rsConfig.getConfigVersion() < newConfig.getConfigVersion()); if (_freshnessChecker) { _freshnessChecker->cancel(&_replExecutor); if (_electCmdRunner) { _electCmdRunner->cancel(&_replExecutor); } _replExecutor.onEvent( _electionFinishedEvent, stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled, this, stdx::placeholders::_1, newConfig)); return; } _replExecutor.scheduleDBWork(stdx::bind( &ReplicationCoordinatorImpl::_heartbeatReconfigStore, this, stdx::placeholders::_1, newConfig)); }
void ReplicationCoordinatorImpl::_heartbeatReconfigStore( const ReplicationExecutor::CallbackData& cbd, const ReplicaSetConfig& newConfig) { if (cbd.status.code() == ErrorCodes::CallbackCanceled) { log() << "The callback to persist the replica set configuration was canceled - " << "the configuration was not persisted but was used: " << newConfig.toBSON(); return; } boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t()); const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); if (myIndex.getStatus() == ErrorCodes::NodeNotFound) { lk.lock(); // If this node absent in newConfig, and this node was not previously initialized, // return to kConfigUninitialized immediately, rather than storing the config and // transitioning into the RS_REMOVED state. See SERVER-15740. if (!_rsConfig.isInitialized()) { invariant(_rsConfigState == kConfigHBReconfiguring); LOG(1) << "Ignoring new configuration in heartbeat response because we are " "uninitialized and not a member of the new configuration"; _setConfigState_inlock(kConfigUninitialized); return; } lk.unlock(); } if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) { warning() << "Not persisting new configuration in heartbeat response to disk because " "it is invalid: "<< myIndex.getStatus(); } else { Status status = _externalState->storeLocalConfigDocument(cbd.txn, newConfig.toBSON()); lk.lock(); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { _setConfigState_inlock(kConfigUninitialized); } return; } lk.unlock(); _externalState->startThreads(); } const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); // Make sure that the reconfigFinishFn doesn't finish until we've reset // _heartbeatReconfigThread. lk.lock(); if (_memberState.primary()) { // If the primary is receiving a heartbeat reconfig, that strongly suggests // that there has been a force reconfiguration. In any event, it might lead // to this node stepping down as primary, so we'd better do it with the global // lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn); } else { _replExecutor.scheduleWork(reconfigFinishFn); } }
void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) { class StoreThreadGuard { public: StoreThreadGuard(boost::unique_lock<boost::mutex>* lk, boost::scoped_ptr<boost::thread>* thread, bool* inShutdown) : _lk(lk), _thread(thread), _inShutdown(inShutdown) {} ~StoreThreadGuard() { if (!_lk->owns_lock()) { _lk->lock(); } if (*_inShutdown) { return; } _thread->get()->detach(); _thread->reset(NULL); } private: boost::unique_lock<boost::mutex>* const _lk; boost::scoped_ptr<boost::thread>* const _thread; bool* const _inShutdown; }; boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t()); StoreThreadGuard guard(&lk, &_heartbeatReconfigThread, &_inShutdown); const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); if (myIndex.getStatus() == ErrorCodes::NodeNotFound) { lk.lock(); // If this node absent in newConfig, and this node was not previously initialized, // return to kConfigUninitialized immediately, rather than storing the config and // transitioning into the RS_REMOVED state. See SERVER-15740. if (!_rsConfig.isInitialized()) { invariant(_rsConfigState == kConfigHBReconfiguring); LOG(1) << "Ignoring new configuration in heartbeat response because we are " "uninitialized and not a member of the new configuration"; _setConfigState_inlock(kConfigUninitialized); return; } lk.unlock(); } if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) { warning() << "Not persisting new configuration in heartbeat response to disk because " "it is invalid: "<< myIndex.getStatus(); } else { boost::scoped_ptr<OperationContext> txn( _externalState->createOperationContext("WriteReplSetConfig")); Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON()); lk.lock(); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { _setConfigState_inlock(kConfigUninitialized); } return; } lk.unlock(); _externalState->startThreads(); } const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); if (_currentState.primary()) { // If the primary is receiving a heartbeat reconfig, that strongly suggests // that there has been a force reconfiguration. In any event, it might lead // to this node stepping down as primary, so we'd better do it with the global // lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn); } else { _replExecutor.scheduleWork(reconfigFinishFn); } }