void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) {
     {
         boost::scoped_ptr<OperationContext> txn(_externalState->createOperationContext());
         Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON());
         if (!status.isOK()) {
             error() << "Ignoring new configuration in heartbeat response because we failed to"
                 " write it to stable storage; " << status;
             boost::lock_guard<boost::mutex> lk(_mutex);
             invariant(_rsConfigState == kConfigHBReconfiguring);
             if (_rsConfig.isInitialized()) {
                 _setConfigState_inlock(kConfigSteady);
             }
             else {
                 // This is the _only_ case where we can return to kConfigUninitialized from
                 // kConfigHBReconfiguring.
                 _setConfigState_inlock(kConfigUninitialized);
             }
             return;
         }
     }
     const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
             _externalState.get(),
             newConfig);
     _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                                           this,
                                           stdx::placeholders::_1,
                                           newConfig,
                                           myIndex));
 }
    void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) {
        boost::lock_guard<boost::mutex> lk(_mutex);
        if (_inShutdown) {
            return;
        }

        switch (_rsConfigState) {
        case kConfigStartingUp:
            LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
                " because still attempting to load local configuration information";
            return;
        case kConfigUninitialized:
        case kConfigSteady:
            LOG(1) << "Received new config via heartbeat with version " <<
                newConfig.getConfigVersion();
            break;
        case kConfigInitiating:
        case kConfigReconfiguring:
        case kConfigHBReconfiguring:
            LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
                " because already in the midst of a configuration process";
            return;
        default:
            severe() << "Reconfiguration request occurred while _rsConfigState == " <<
                int(_rsConfigState) << "; aborting.";
            fassertFailed(18807);
        }
        _setConfigState_inlock(kConfigHBReconfiguring);
        invariant(!_rsConfig.isInitialized() ||
                  _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
        if (_freshnessChecker) {
            _freshnessChecker->cancel(&_replExecutor);
            if (_electCmdRunner) {
                _electCmdRunner->cancel(&_replExecutor);
            }
            _replExecutor.onEvent(
                    _electionFinishedEvent,
                    stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled,
                               this,
                               stdx::placeholders::_1,
                               newConfig));
            return;
        }
        _replExecutor.scheduleDBWork(stdx::bind(
            &ReplicationCoordinatorImpl::_heartbeatReconfigStore,
            this,
            stdx::placeholders::_1,
            newConfig));
    }
    void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
        const ReplicationExecutor::CallbackData& cbd,
        const ReplicaSetConfig& newConfig) {

        if (cbd.status.code() == ErrorCodes::CallbackCanceled) {
            log() << "The callback to persist the replica set configuration was canceled - "
                  << "the configuration was not persisted but was used: " << newConfig.toBSON();
            return;
        }

        boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t());

        const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
                _externalState.get(),
                newConfig);

        if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
            lk.lock();
            // If this node absent in newConfig, and this node was not previously initialized,
            // return to kConfigUninitialized immediately, rather than storing the config and
            // transitioning into the RS_REMOVED state.  See SERVER-15740.
            if (!_rsConfig.isInitialized()) {
                invariant(_rsConfigState == kConfigHBReconfiguring);
                LOG(1) << "Ignoring new configuration in heartbeat response because we are "
                    "uninitialized and not a member of the new configuration";
                _setConfigState_inlock(kConfigUninitialized);
                return;
            }
            lk.unlock();
        }

        if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
            warning() << "Not persisting new configuration in heartbeat response to disk because "
                    "it is invalid: "<< myIndex.getStatus();
        }
        else {
            Status status = _externalState->storeLocalConfigDocument(cbd.txn, newConfig.toBSON());

            lk.lock();
            if (!status.isOK()) {
                error() << "Ignoring new configuration in heartbeat response because we failed to"
                    " write it to stable storage; " << status;
                invariant(_rsConfigState == kConfigHBReconfiguring);
                if (_rsConfig.isInitialized()) {
                    _setConfigState_inlock(kConfigSteady);
                }
                else {
                    _setConfigState_inlock(kConfigUninitialized);
                }
                return;
            }

            lk.unlock();

            _externalState->startThreads();
        }

        const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
                stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                           this,
                           stdx::placeholders::_1,
                           newConfig,
                           myIndex));

        // Make sure that the reconfigFinishFn doesn't finish until we've reset
        // _heartbeatReconfigThread.
        lk.lock();
        if (_memberState.primary()) {
            // If the primary is receiving a heartbeat reconfig, that strongly suggests
            // that there has been a force reconfiguration.  In any event, it might lead
            // to this node stepping down as primary, so we'd better do it with the global
            // lock.
            _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
        }
        else {
            _replExecutor.scheduleWork(reconfigFinishFn);
        }
    }
Пример #4
0
    void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) {
        class StoreThreadGuard {
        public:
            StoreThreadGuard(boost::unique_lock<boost::mutex>* lk,
                             boost::scoped_ptr<boost::thread>* thread,
                             bool* inShutdown) :
                _lk(lk),
                _thread(thread),
                _inShutdown(inShutdown) {}
            ~StoreThreadGuard() {
                if (!_lk->owns_lock()) {
                    _lk->lock();
                }
                if (*_inShutdown) {
                    return;
                }
                _thread->get()->detach();
                _thread->reset(NULL);
            }

        private:
            boost::unique_lock<boost::mutex>* const _lk;
            boost::scoped_ptr<boost::thread>* const _thread;
            bool* const _inShutdown;
        };

        boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t());
        StoreThreadGuard guard(&lk, &_heartbeatReconfigThread, &_inShutdown);

        const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
                _externalState.get(),
                newConfig);

        if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
            lk.lock();
            // If this node absent in newConfig, and this node was not previously initialized,
            // return to kConfigUninitialized immediately, rather than storing the config and
            // transitioning into the RS_REMOVED state.  See SERVER-15740.
            if (!_rsConfig.isInitialized()) {
                invariant(_rsConfigState == kConfigHBReconfiguring);
                LOG(1) << "Ignoring new configuration in heartbeat response because we are "
                    "uninitialized and not a member of the new configuration";
                _setConfigState_inlock(kConfigUninitialized);
                return;
            }
            lk.unlock();
        }

        if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
            warning() << "Not persisting new configuration in heartbeat response to disk because "
                    "it is invalid: "<< myIndex.getStatus();
        }
        else {
            boost::scoped_ptr<OperationContext> txn(
                                      _externalState->createOperationContext("WriteReplSetConfig"));
            Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON());

            lk.lock();
            if (!status.isOK()) {
                error() << "Ignoring new configuration in heartbeat response because we failed to"
                    " write it to stable storage; " << status;
                invariant(_rsConfigState == kConfigHBReconfiguring);
                if (_rsConfig.isInitialized()) {
                    _setConfigState_inlock(kConfigSteady);
                }
                else {
                    _setConfigState_inlock(kConfigUninitialized);
                }
                return;
            }

            lk.unlock();

            _externalState->startThreads();
        }

        const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
                stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                           this,
                           stdx::placeholders::_1,
                           newConfig,
                           myIndex));

        if (_currentState.primary()) {
            // If the primary is receiving a heartbeat reconfig, that strongly suggests
            // that there has been a force reconfiguration.  In any event, it might lead
            // to this node stepping down as primary, so we'd better do it with the global
            // lock.
            _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
        }
        else {
            _replExecutor.scheduleWork(reconfigFinishFn);
        }
    }