void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) { { boost::scoped_ptr<OperationContext> txn(_externalState->createOperationContext()); Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON()); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; boost::lock_guard<boost::mutex> lk(_mutex); invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { // This is the _only_ case where we can return to kConfigUninitialized from // kConfigHBReconfiguring. _setConfigState_inlock(kConfigUninitialized); } return; } } const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); }
void ReplicationCoordinatorImpl::_heartbeatReconfigStore( const ReplicationExecutor::CallbackData& cbd, const ReplicaSetConfig& newConfig) { if (cbd.status.code() == ErrorCodes::CallbackCanceled) { log() << "The callback to persist the replica set configuration was canceled - " << "the configuration was not persisted but was used: " << newConfig.toBSON(); return; } boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t()); const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); if (myIndex.getStatus() == ErrorCodes::NodeNotFound) { lk.lock(); // If this node absent in newConfig, and this node was not previously initialized, // return to kConfigUninitialized immediately, rather than storing the config and // transitioning into the RS_REMOVED state. See SERVER-15740. if (!_rsConfig.isInitialized()) { invariant(_rsConfigState == kConfigHBReconfiguring); LOG(1) << "Ignoring new configuration in heartbeat response because we are " "uninitialized and not a member of the new configuration"; _setConfigState_inlock(kConfigUninitialized); return; } lk.unlock(); } if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) { warning() << "Not persisting new configuration in heartbeat response to disk because " "it is invalid: "<< myIndex.getStatus(); } else { Status status = _externalState->storeLocalConfigDocument(cbd.txn, newConfig.toBSON()); lk.lock(); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { _setConfigState_inlock(kConfigUninitialized); } return; } lk.unlock(); _externalState->startThreads(); } const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); // Make sure that the reconfigFinishFn doesn't finish until we've reset // _heartbeatReconfigThread. lk.lock(); if (_memberState.primary()) { // If the primary is receiving a heartbeat reconfig, that strongly suggests // that there has been a force reconfiguration. In any event, it might lead // to this node stepping down as primary, so we'd better do it with the global // lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn); } else { _replExecutor.scheduleWork(reconfigFinishFn); } }
void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) { class StoreThreadGuard { public: StoreThreadGuard(boost::unique_lock<boost::mutex>* lk, boost::scoped_ptr<boost::thread>* thread, bool* inShutdown) : _lk(lk), _thread(thread), _inShutdown(inShutdown) {} ~StoreThreadGuard() { if (!_lk->owns_lock()) { _lk->lock(); } if (*_inShutdown) { return; } _thread->get()->detach(); _thread->reset(NULL); } private: boost::unique_lock<boost::mutex>* const _lk; boost::scoped_ptr<boost::thread>* const _thread; bool* const _inShutdown; }; boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t()); StoreThreadGuard guard(&lk, &_heartbeatReconfigThread, &_inShutdown); const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig( _externalState.get(), newConfig); if (myIndex.getStatus() == ErrorCodes::NodeNotFound) { lk.lock(); // If this node absent in newConfig, and this node was not previously initialized, // return to kConfigUninitialized immediately, rather than storing the config and // transitioning into the RS_REMOVED state. See SERVER-15740. if (!_rsConfig.isInitialized()) { invariant(_rsConfigState == kConfigHBReconfiguring); LOG(1) << "Ignoring new configuration in heartbeat response because we are " "uninitialized and not a member of the new configuration"; _setConfigState_inlock(kConfigUninitialized); return; } lk.unlock(); } if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) { warning() << "Not persisting new configuration in heartbeat response to disk because " "it is invalid: "<< myIndex.getStatus(); } else { boost::scoped_ptr<OperationContext> txn( _externalState->createOperationContext("WriteReplSetConfig")); Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON()); lk.lock(); if (!status.isOK()) { error() << "Ignoring new configuration in heartbeat response because we failed to" " write it to stable storage; " << status; invariant(_rsConfigState == kConfigHBReconfiguring); if (_rsConfig.isInitialized()) { _setConfigState_inlock(kConfigSteady); } else { _setConfigState_inlock(kConfigUninitialized); } return; } lk.unlock(); _externalState->startThreads(); } const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn( stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish, this, stdx::placeholders::_1, newConfig, myIndex)); if (_currentState.primary()) { // If the primary is receiving a heartbeat reconfig, that strongly suggests // that there has been a force reconfiguration. In any event, it might lead // to this node stepping down as primary, so we'd better do it with the global // lock. _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn); } else { _replExecutor.scheduleWork(reconfigFinishFn); } }