コード例 #1
0
ファイル: oplog_fetcher.cpp プロジェクト: ChineseDr/mongo
OplogFetcher::OplogFetcher(executor::TaskExecutor* exec,
                           OpTimeWithHash lastFetched,
                           HostAndPort source,
                           NamespaceString oplogNSS,
                           ReplicaSetConfig config,
                           DataReplicatorExternalState* dataReplicatorExternalState,
                           EnqueueDocumentsFn enqueueDocumentsFn,
                           OnShutdownCallbackFn onShutdownCallbackFn)
    : _dataReplicatorExternalState(dataReplicatorExternalState),
      _fetcher(exec,
               source,
               oplogNSS.db().toString(),
               makeFindCommandObject(dataReplicatorExternalState, oplogNSS, lastFetched.opTime),
               stdx::bind(
                   &OplogFetcher::_callback, this, stdx::placeholders::_1, stdx::placeholders::_3),
               uassertStatusOK(makeMetadataObject(config.getProtocolVersion() == 1LL)),
               config.getElectionTimeoutPeriod()),
      _enqueueDocumentsFn(enqueueDocumentsFn),
      _awaitDataTimeout(calculateAwaitDataTimeout(config)),
      _onShutdownCallbackFn(onShutdownCallbackFn),
      _lastFetched(lastFetched) {
    uassert(ErrorCodes::BadValue, "null last optime fetched", !lastFetched.opTime.isNull());
    uassert(ErrorCodes::InvalidReplicaSetConfig,
            "uninitialized replica set configuration",
            config.isInitialized());
    uassert(ErrorCodes::BadValue, "null enqueueDocuments function", enqueueDocumentsFn);
    uassert(ErrorCodes::BadValue, "null onShutdownCallback function", onShutdownCallbackFn);
}
コード例 #2
0
void ReplCoordTest::simulateEnoughHeartbeatsForAllNodesUp() {
    ReplicationCoordinatorImpl* replCoord = getReplCoord();
    ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
    NetworkInterfaceMock* net = getNet();
    net->enterNetwork();
    for (int i = 0; i < rsConfig.getNumMembers() - 1; ++i) {
        const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
        const RemoteCommandRequest& request = noi->getRequest();
        log() << request.target.toString() << " processing " << request.cmdObj;
        ReplSetHeartbeatArgsV1 hbArgs;
        ReplSetHeartbeatArgs hbArgsPV0;
        if (hbArgs.initialize(request.cmdObj).isOK() ||
            hbArgsPV0.initialize(request.cmdObj).isOK()) {
            ReplSetHeartbeatResponse hbResp;
            hbResp.setSetName(rsConfig.getReplSetName());
            hbResp.setState(MemberState::RS_SECONDARY);
            hbResp.setConfigVersion(rsConfig.getConfigVersion());
            hbResp.setAppliedOpTime(OpTime(Timestamp(100, 2), 0));
            BSONObjBuilder respObj;
            net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
        } else {
            error() << "Black holing unexpected request to " << request.target << ": "
                    << request.cmdObj;
            net->blackHole(noi);
        }
        net->runReadyNetworkOperations();
    }
    net->exitNetwork();
}
コード例 #3
0
MockReplicaSet::MockReplicaSet(const string& setName, size_t nodes) : _setName(setName) {
    BSONObjBuilder configBuilder;
    configBuilder.append("_id", setName);
    configBuilder.append("version", 1);

    BSONArrayBuilder membersBuilder(configBuilder.subarrayStart("members"));
    for (size_t n = 0; n < nodes; n++) {
        std::stringstream str;
        str << "$" << setName << n << ":27017";
        const string hostName(str.str());

        if (n == 0) {
            _primaryHost = hostName;
        }

        MockRemoteDBServer* mockServer = new MockRemoteDBServer(hostName);
        _nodeMap[hostName] = mockServer;

        MockConnRegistry::get()->addServer(mockServer);

        membersBuilder.append(BSON("_id" << static_cast<int>(n) << "host" << hostName));
    }
    membersBuilder.done();

    ReplicaSetConfig replConfig;
    fassert(28566, replConfig.initialize(configBuilder.obj()));
    fassert(28573, replConfig.validate());
    setConfig(replConfig);
}
コード例 #4
0
void ReplCoordTest::simulateSuccessfulElection() {
    OperationContextReplMock txn;
    ReplicationCoordinatorImpl* replCoord = getReplCoord();
    NetworkInterfaceMock* net = getNet();
    ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
    ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString();
    while (!replCoord->getMemberState().primary()) {
        log() << "Waiting on network in state " << replCoord->getMemberState();
        getNet()->enterNetwork();
        const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
        const RemoteCommandRequest& request = noi->getRequest();
        log() << request.target.toString() << " processing " << request.cmdObj;
        ReplSetHeartbeatArgs hbArgs;
        if (hbArgs.initialize(request.cmdObj).isOK()) {
            ReplSetHeartbeatResponse hbResp;
            hbResp.setSetName(rsConfig.getReplSetName());
            hbResp.setState(MemberState::RS_SECONDARY);
            hbResp.setConfigVersion(rsConfig.getConfigVersion());
            BSONObjBuilder respObj;
            respObj << "ok" << 1;
            hbResp.addToBSON(&respObj, false);
            net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
        } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetFresh") {
            net->scheduleResponse(
                noi,
                net->now(),
                makeResponseStatus(BSON("ok" << 1 << "fresher" << false << "opTime" << Date_t()
                                             << "veto" << false)));
        } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetElect") {
            net->scheduleResponse(noi,
                                  net->now(),
                                  makeResponseStatus(BSON("ok" << 1 << "vote" << 1 << "round"
                                                               << request.cmdObj["round"].OID())));
        } else {
            error() << "Black holing unexpected request to " << request.target << ": "
                    << request.cmdObj;
            net->blackHole(noi);
        }
        net->runReadyNetworkOperations();
        getNet()->exitNetwork();
    }
    ASSERT(replCoord->isWaitingForApplierToDrain());
    ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();

    IsMasterResponse imResponse;
    replCoord->fillIsMasterForReplSet(&imResponse);
    ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
    ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
    replCoord->signalDrainComplete(&txn);
    replCoord->fillIsMasterForReplSet(&imResponse);
    ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
    ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();

    ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();

    // Consume the notification of election win.
    for (int i = 0; i < rsConfig.getNumMembers() - 1; i++) {
        replyToReceivedHeartbeat();
    }
}
コード例 #5
0
void ReplCoordTest::simulateSuccessfulV1Election() {
    OperationContextReplMock txn;
    ReplicationCoordinatorImpl* replCoord = getReplCoord();
    NetworkInterfaceMock* net = getNet();
    ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
    ASSERT(replCoord->getMemberState().secondary()) << replCoord->getMemberState().toString();
    while (!replCoord->getMemberState().primary()) {
        log() << "Waiting on network in state " << replCoord->getMemberState();
        getNet()->enterNetwork();
        const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
        const RemoteCommandRequest& request = noi->getRequest();
        log() << request.target.toString() << " processing " << request.cmdObj;
        ReplSetHeartbeatArgsV1 hbArgs;
        Status status = hbArgs.initialize(request.cmdObj);
        if (hbArgs.initialize(request.cmdObj).isOK()) {
            ReplSetHeartbeatResponse hbResp;
            hbResp.setSetName(rsConfig.getReplSetName());
            hbResp.setState(MemberState::RS_SECONDARY);
            hbResp.setConfigVersion(rsConfig.getConfigVersion());
            net->scheduleResponse(noi, net->now(), makeResponseStatus(hbResp.toBSON(true)));
        } else if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
            net->scheduleResponse(
                noi,
                net->now(),
                makeResponseStatus(BSON("ok" << 1 << "reason"
                                             << ""
                                             << "term" << request.cmdObj["term"].Long()
                                             << "voteGranted" << true)));
        } else if (request.cmdObj.firstElement().fieldNameStringData() ==
                   "replSetDeclareElectionWinner") {
            net->scheduleResponse(
                noi,
                net->now(),
                makeResponseStatus(BSON("ok" << 1 << "term" << request.cmdObj["term"].Long())));
        } else {
            error() << "Black holing unexpected request to " << request.target << ": "
                    << request.cmdObj;
            net->blackHole(noi);
        }
        net->runReadyNetworkOperations();
        getNet()->exitNetwork();
    }
    ASSERT(replCoord->isWaitingForApplierToDrain());
    ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();

    IsMasterResponse imResponse;
    replCoord->fillIsMasterForReplSet(&imResponse);
    ASSERT_FALSE(imResponse.isMaster()) << imResponse.toBSON().toString();
    ASSERT_TRUE(imResponse.isSecondary()) << imResponse.toBSON().toString();
    replCoord->signalDrainComplete(&txn);
    replCoord->fillIsMasterForReplSet(&imResponse);
    ASSERT_TRUE(imResponse.isMaster()) << imResponse.toBSON().toString();
    ASSERT_FALSE(imResponse.isSecondary()) << imResponse.toBSON().toString();

    ASSERT(replCoord->getMemberState().primary()) << replCoord->getMemberState().toString();
}
コード例 #6
0
StatusWith<int> validateConfigForInitiate(ReplicationCoordinatorExternalState* externalState,
                                          const ReplicaSetConfig& newConfig) {
    Status status = newConfig.validate();
    if (!status.isOK()) {
        return StatusWith<int>(status);
    }
    if (newConfig.getConfigVersion() != 1) {
        return StatusWith<int>(ErrorCodes::NewReplicaSetConfigurationIncompatible,
                               str::stream() << "Configuration used to initiate a replica set must "
                                             << " have version 1, but found "
                                             << newConfig.getConfigVersion());
    }
    return findSelfInConfigIfElectable(externalState, newConfig);
}
コード例 #7
0
    void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(const ReplicaSetConfig& newConfig) {
        boost::lock_guard<boost::mutex> lk(_mutex);
        if (_inShutdown) {
            return;
        }

        switch (_rsConfigState) {
        case kConfigStartingUp:
            LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
                " because still attempting to load local configuration information";
            return;
        case kConfigUninitialized:
        case kConfigSteady:
            LOG(1) << "Received new config via heartbeat with version " <<
                newConfig.getConfigVersion();
            break;
        case kConfigInitiating:
        case kConfigReconfiguring:
        case kConfigHBReconfiguring:
            LOG(1) << "Ignoring new configuration with version " << newConfig.getConfigVersion() <<
                " because already in the midst of a configuration process";
            return;
        default:
            severe() << "Reconfiguration request occurred while _rsConfigState == " <<
                int(_rsConfigState) << "; aborting.";
            fassertFailed(18807);
        }
        _setConfigState_inlock(kConfigHBReconfiguring);
        invariant(!_rsConfig.isInitialized() ||
                  _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
        if (_freshnessChecker) {
            _freshnessChecker->cancel(&_replExecutor);
            if (_electCmdRunner) {
                _electCmdRunner->cancel(&_replExecutor);
            }
            _replExecutor.onEvent(
                    _electionFinishedEvent,
                    stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigAfterElectionCanceled,
                               this,
                               stdx::placeholders::_1,
                               newConfig));
            return;
        }
        _replExecutor.scheduleDBWork(stdx::bind(
            &ReplicationCoordinatorImpl::_heartbeatReconfigStore,
            this,
            stdx::placeholders::_1,
            newConfig));
    }
コード例 #8
0
StatusWith<int> validateConfigForStartUp(ReplicationCoordinatorExternalState* externalState,
                                         const ReplicaSetConfig& oldConfig,
                                         const ReplicaSetConfig& newConfig) {
    Status status = newConfig.validate();
    if (!status.isOK()) {
        return StatusWith<int>(status);
    }
    if (oldConfig.isInitialized()) {
        status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
        if (!status.isOK()) {
            return StatusWith<int>(status);
        }
    }
    return findSelfInConfig(externalState, newConfig);
}
コード例 #9
0
 Status checkQuorumForReconfig(ReplicationExecutor* executor,
                               const ReplicaSetConfig& rsConfig,
                               const int myIndex) {
     invariant(rsConfig.getConfigVersion() > 1);
     QuorumChecker checker(&rsConfig, myIndex);
     return checker.run(executor);
 }
コード例 #10
0
 void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) {
     {
         boost::scoped_ptr<OperationContext> txn(_externalState->createOperationContext());
         Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON());
         if (!status.isOK()) {
             error() << "Ignoring new configuration in heartbeat response because we failed to"
                 " write it to stable storage; " << status;
             boost::lock_guard<boost::mutex> lk(_mutex);
             invariant(_rsConfigState == kConfigHBReconfiguring);
             if (_rsConfig.isInitialized()) {
                 _setConfigState_inlock(kConfigSteady);
             }
             else {
                 // This is the _only_ case where we can return to kConfigUninitialized from
                 // kConfigHBReconfiguring.
                 _setConfigState_inlock(kConfigUninitialized);
             }
             return;
         }
     }
     const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
             _externalState.get(),
             newConfig);
     _replExecutor.scheduleWork(stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                                           this,
                                           stdx::placeholders::_1,
                                           newConfig,
                                           myIndex));
 }
コード例 #11
0
    void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
            const ReplicationExecutor::CallbackData& cbData,
            const ReplicaSetConfig& newConfig,
            StatusWith<int> myIndex) {

        boost::lock_guard<boost::mutex> lk(_mutex);
        invariant(_rsConfigState == kConfigHBReconfiguring);
        invariant(!_rsConfig.isInitialized() ||
                  _rsConfig.getConfigVersion() < newConfig.getConfigVersion());
        if (!myIndex.isOK()) {
            switch (myIndex.getStatus().code()) {
            case ErrorCodes::NoSuchKey:
                log() << "Cannot find self in new replica set configuration; I must be removed; " <<
                    myIndex.getStatus();
                break;
            case ErrorCodes::DuplicateKey:
                error() << "Several entries in new config represent this node; "
                    "Removing self until an acceptable configuration arrives; " <<
                    myIndex.getStatus();
                break;
            default:
                error() << "Could not validate configuration received from remote node; "
                    "Removing self until an acceptable configuration arrives; " <<
                    myIndex.getStatus();
                break;
            }
            myIndex = StatusWith<int>(-1);
        }
        _setCurrentRSConfig_inlock(newConfig, myIndex.getValue());
    }
コード例 #12
0
void ReplCoordTest::simulateSuccessfulDryRun(
    stdx::function<void(const RemoteCommandRequest& request)> onDryRunRequest) {
    ReplicationCoordinatorImpl* replCoord = getReplCoord();
    ReplicaSetConfig rsConfig = replCoord->getReplicaSetConfig_forTest();
    NetworkInterfaceMock* net = getNet();

    auto electionTimeoutWhen = replCoord->getElectionTimeout_forTest();
    ASSERT_NOT_EQUALS(Date_t(), electionTimeoutWhen);
    log() << "Election timeout scheduled at " << electionTimeoutWhen << " (simulator time)";

    int voteRequests = 0;
    int votesExpected = rsConfig.getNumMembers() / 2;
    log() << "Simulating dry run responses - expecting " << votesExpected
          << " replSetRequestVotes requests";
    net->enterNetwork();
    while (voteRequests < votesExpected) {
        if (net->now() < electionTimeoutWhen) {
            net->runUntil(electionTimeoutWhen);
        }
        const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
        const RemoteCommandRequest& request = noi->getRequest();
        log() << request.target.toString() << " processing " << request.cmdObj;
        if (request.cmdObj.firstElement().fieldNameStringData() == "replSetRequestVotes") {
            ASSERT_TRUE(request.cmdObj.getBoolField("dryRun"));
            onDryRunRequest(request);
            net->scheduleResponse(
                noi,
                net->now(),
                makeResponseStatus(BSON("ok" << 1 << "reason"
                                             << ""
                                             << "term" << request.cmdObj["term"].Long()
                                             << "voteGranted" << true)));
            voteRequests++;
        } else {
            error() << "Black holing unexpected request to " << request.target << ": "
                    << request.cmdObj;
            net->blackHole(noi);
        }
        net->runReadyNetworkOperations();
    }
    net->exitNetwork();
    log() << "Simulating dry run responses - scheduled " << voteRequests
          << " replSetRequestVotes responses";
    getReplCoord()->waitForElectionDryRunFinish_forTest();
    log() << "Simulating dry run responses - dry run completed";
}
コード例 #13
0
StatusWith<int> validateConfigForHeartbeatReconfig(
    ReplicationCoordinatorExternalState* externalState, const ReplicaSetConfig& newConfig) {
    Status status = newConfig.validate();
    if (!status.isOK()) {
        return StatusWith<int>(status);
    }

    return findSelfInConfig(externalState, newConfig);
}
コード例 #14
0
void ReplCoordTest::replyToReceivedHeartbeatV1() {
    NetworkInterfaceMock* net = getNet();
    net->enterNetwork();
    const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
    const RemoteCommandRequest& request = noi->getRequest();
    const ReplicaSetConfig rsConfig = getReplCoord()->getReplicaSetConfig_forTest();
    repl::ReplSetHeartbeatArgsV1 hbArgs;
    ASSERT_OK(hbArgs.initialize(request.cmdObj));
    repl::ReplSetHeartbeatResponse hbResp;
    hbResp.setSetName(rsConfig.getReplSetName());
    hbResp.setState(MemberState::RS_SECONDARY);
    hbResp.setConfigVersion(rsConfig.getConfigVersion());
    BSONObjBuilder respObj;
    respObj << "ok" << 1;
    hbResp.addToBSON(&respObj, false);
    net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
    net->runReadyNetworkOperations();
    getNet()->exitNetwork();
}
コード例 #15
0
StatusWith<ReplicationExecutor::EventHandle> FreshnessChecker::start(
    ReplicationExecutor* executor,
    const Timestamp& lastOpTimeApplied,
    const ReplicaSetConfig& currentConfig,
    int selfIndex,
    const std::vector<HostAndPort>& targets) {
    _originalConfigVersion = currentConfig.getConfigVersion();
    _algorithm.reset(new Algorithm(lastOpTimeApplied, currentConfig, selfIndex, targets));
    _runner.reset(new ScatterGatherRunner(_algorithm.get(), executor));
    return _runner->start();
}
コード例 #16
0
void ReplicationCoordinatorImpl::_heartbeatReconfigFinish(
    const ReplicationExecutor::CallbackArgs& cbData,
    const ReplicaSetConfig& newConfig,
    StatusWith<int> myIndex) {
    if (cbData.status == ErrorCodes::CallbackCanceled) {
        return;
    }

    stdx::unique_lock<stdx::mutex> lk(_mutex);
    invariant(_rsConfigState == kConfigHBReconfiguring);
    invariant(!_rsConfig.isInitialized() ||
              _rsConfig.getConfigVersion() < newConfig.getConfigVersion());

    if (_getMemberState_inlock().primary() && !cbData.txn) {
        // Not having an OperationContext in the CallbackData means we definitely aren't holding
        // the global lock.  Since we're primary and this reconfig could cause us to stepdown,
        // reschedule this work with the global exclusive lock so the stepdown is safe.
        // TODO(spencer): When we *do* have an OperationContext, consult it to confirm that
        // we are indeed holding the global lock.
        _replExecutor.scheduleWorkWithGlobalExclusiveLock(
            stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                       this,
                       stdx::placeholders::_1,
                       newConfig,
                       myIndex));
        return;
    }

    if (!myIndex.isOK()) {
        switch (myIndex.getStatus().code()) {
            case ErrorCodes::NodeNotFound:
                log() << "Cannot find self in new replica set configuration; I must be removed; "
                      << myIndex.getStatus();
                break;
            case ErrorCodes::DuplicateKey:
                error() << "Several entries in new config represent this node; "
                           "Removing self until an acceptable configuration arrives; "
                        << myIndex.getStatus();
                break;
            default:
                error() << "Could not validate configuration received from remote node; "
                           "Removing self until an acceptable configuration arrives; "
                        << myIndex.getStatus();
                break;
        }
        myIndex = StatusWith<int>(-1);
    }
    const PostMemberStateUpdateAction action =
        _setCurrentRSConfig_inlock(cbData, newConfig, myIndex.getValue());
    lk.unlock();
    _resetElectionInfoOnProtocolVersionUpgrade(newConfig);
    _performPostMemberStateUpdateAction(action);
}
コード例 #17
0
StatusWith<int> validateConfigForReconfig(ReplicationCoordinatorExternalState* externalState,
                                          const ReplicaSetConfig& oldConfig,
                                          const ReplicaSetConfig& newConfig,
                                          bool force) {
    Status status = newConfig.validate();
    if (!status.isOK()) {
        return StatusWith<int>(status);
    }

    status = validateOldAndNewConfigsCompatible(oldConfig, newConfig);
    if (!status.isOK()) {
        return StatusWith<int>(status);
    }

    if (force) {
        return findSelfInConfig(externalState, newConfig);
    }

    return findSelfInConfigIfElectable(externalState, newConfig);
}
コード例 #18
0
 Status checkQuorumForReconfig(ReplicationExecutor* executor,
                               const ReplicaSetConfig& rsConfig,
                               const int myIndex) {
     invariant(rsConfig.getConfigVersion() > 1);
     return checkQuorumGeneral(executor, rsConfig, myIndex);
 }
コード例 #19
0
ReplicaSetConfig ReplCoordTest::assertMakeRSConfig(const BSONObj& configBson) {
    ReplicaSetConfig config;
    ASSERT_OK(config.initialize(configBson));
    ASSERT_OK(config.validate());
    return config;
}
コード例 #20
0
    void ReplicationCoordinatorImpl::_heartbeatReconfigStore(const ReplicaSetConfig& newConfig) {
        class StoreThreadGuard {
        public:
            StoreThreadGuard(boost::unique_lock<boost::mutex>* lk,
                             boost::scoped_ptr<boost::thread>* thread,
                             bool* inShutdown) :
                _lk(lk),
                _thread(thread),
                _inShutdown(inShutdown) {}
            ~StoreThreadGuard() {
                if (!_lk->owns_lock()) {
                    _lk->lock();
                }
                if (*_inShutdown) {
                    return;
                }
                _thread->get()->detach();
                _thread->reset(NULL);
            }

        private:
            boost::unique_lock<boost::mutex>* const _lk;
            boost::scoped_ptr<boost::thread>* const _thread;
            bool* const _inShutdown;
        };

        boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t());
        StoreThreadGuard guard(&lk, &_heartbeatReconfigThread, &_inShutdown);

        const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
                _externalState.get(),
                newConfig);

        if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
            lk.lock();
            // If this node absent in newConfig, and this node was not previously initialized,
            // return to kConfigUninitialized immediately, rather than storing the config and
            // transitioning into the RS_REMOVED state.  See SERVER-15740.
            if (!_rsConfig.isInitialized()) {
                invariant(_rsConfigState == kConfigHBReconfiguring);
                LOG(1) << "Ignoring new configuration in heartbeat response because we are "
                    "uninitialized and not a member of the new configuration";
                _setConfigState_inlock(kConfigUninitialized);
                return;
            }
            lk.unlock();
        }

        if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
            warning() << "Not persisting new configuration in heartbeat response to disk because "
                    "it is invalid: "<< myIndex.getStatus();
        }
        else {
            boost::scoped_ptr<OperationContext> txn(
                                      _externalState->createOperationContext("WriteReplSetConfig"));
            Status status = _externalState->storeLocalConfigDocument(txn.get(), newConfig.toBSON());

            lk.lock();
            if (!status.isOK()) {
                error() << "Ignoring new configuration in heartbeat response because we failed to"
                    " write it to stable storage; " << status;
                invariant(_rsConfigState == kConfigHBReconfiguring);
                if (_rsConfig.isInitialized()) {
                    _setConfigState_inlock(kConfigSteady);
                }
                else {
                    _setConfigState_inlock(kConfigUninitialized);
                }
                return;
            }

            lk.unlock();

            _externalState->startThreads();
        }

        const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
                stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                           this,
                           stdx::placeholders::_1,
                           newConfig,
                           myIndex));

        if (_currentState.primary()) {
            // If the primary is receiving a heartbeat reconfig, that strongly suggests
            // that there has been a force reconfiguration.  In any event, it might lead
            // to this node stepping down as primary, so we'd better do it with the global
            // lock.
            _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
        }
        else {
            _replExecutor.scheduleWork(reconfigFinishFn);
        }
    }
コード例 #21
0
    Status LegacyReplicationCoordinator::processHeartbeat(const ReplSetHeartbeatArgs& args,
                                                          ReplSetHeartbeatResponse* response) {
        if (args.getProtocolVersion() != 1) {
            return Status(ErrorCodes::BadValue, "incompatible replset protocol version");
        }

        {
            if (_settings.ourSetName() != args.getSetName()) {
                log() << "replSet set names do not match, our cmdline: " << _settings.replSet
                      << rsLog;
                log() << "replSet s: " << args.getSetName() << rsLog;
                response->noteMismatched();
                return Status(ErrorCodes::BadValue, "repl set names do not match");
            }
        }

        response->noteReplSet();
        if( (theReplSet == 0) || (theReplSet->startupStatus == ReplSetImpl::LOADINGCONFIG) ) {
            if (!args.getSenderHost().empty()) {
                scoped_lock lck( _settings.discoveredSeeds_mx );
                _settings.discoveredSeeds.insert(args.getSenderHost().toString());
            }
            response->setHbMsg("still initializing");
            return Status::OK();
        }

        if (theReplSet->name() != args.getSetName()) {
            response->noteMismatched();
            return Status(ErrorCodes::BadValue, "repl set names do not match (2)");
        }
        response->setSetName(theReplSet->name());

        MemberState currentState = theReplSet->state();
        response->setState(currentState.s);
        if (currentState == MemberState::RS_PRIMARY) {
            response->setElectionTime(theReplSet->getElectionTime().asDate());
        }

        response->setElectable(theReplSet->iAmElectable());
        response->setHbMsg(theReplSet->hbmsg());
        response->setTime((long long) time(0));
        response->setOpTime(theReplSet->lastOpTimeWritten.asDate());
        const Member *syncTarget = BackgroundSync::get()->getSyncTarget();
        if (syncTarget) {
            response->setSyncingTo(syncTarget->fullName());
        }

        int v = theReplSet->config().version;
        response->setVersion(v);
        if (v > args.getConfigVersion()) {
            ReplicaSetConfig config;
            fassert(18635, config.initialize(theReplSet->config().asBson()));
            response->setConfig(config);
        }

        Member* from = NULL;
        if (v == args.getConfigVersion() && args.getSenderId() != -1) {
            from = theReplSet->getMutableMember(args.getSenderId());
        }
        if (!from) {
            from = theReplSet->findByName(args.getSenderHost().toString());
            if (!from) {
                return Status::OK();
            }
        }

        // if we thought that this node is down, let it know
        if (!from->hbinfo().up()) {
            response->noteStateDisagreement();
        }

        // note that we got a heartbeat from this node
        theReplSet->mgr->send(stdx::bind(&ReplSet::msgUpdateHBRecv,
                                         theReplSet, from->hbinfo().id(), time(0)));


        return Status::OK();
    }
コード例 #22
0
    void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
        const ReplicationExecutor::CallbackData& cbd,
        const ReplicaSetConfig& newConfig) {

        if (cbd.status.code() == ErrorCodes::CallbackCanceled) {
            log() << "The callback to persist the replica set configuration was canceled - "
                  << "the configuration was not persisted but was used: " << newConfig.toBSON();
            return;
        }

        boost::unique_lock<boost::mutex> lk(_mutex, boost::defer_lock_t());

        const StatusWith<int> myIndex = validateConfigForHeartbeatReconfig(
                _externalState.get(),
                newConfig);

        if (myIndex.getStatus() == ErrorCodes::NodeNotFound) {
            lk.lock();
            // If this node absent in newConfig, and this node was not previously initialized,
            // return to kConfigUninitialized immediately, rather than storing the config and
            // transitioning into the RS_REMOVED state.  See SERVER-15740.
            if (!_rsConfig.isInitialized()) {
                invariant(_rsConfigState == kConfigHBReconfiguring);
                LOG(1) << "Ignoring new configuration in heartbeat response because we are "
                    "uninitialized and not a member of the new configuration";
                _setConfigState_inlock(kConfigUninitialized);
                return;
            }
            lk.unlock();
        }

        if (!myIndex.getStatus().isOK() && myIndex.getStatus() != ErrorCodes::NodeNotFound) {
            warning() << "Not persisting new configuration in heartbeat response to disk because "
                    "it is invalid: "<< myIndex.getStatus();
        }
        else {
            Status status = _externalState->storeLocalConfigDocument(cbd.txn, newConfig.toBSON());

            lk.lock();
            if (!status.isOK()) {
                error() << "Ignoring new configuration in heartbeat response because we failed to"
                    " write it to stable storage; " << status;
                invariant(_rsConfigState == kConfigHBReconfiguring);
                if (_rsConfig.isInitialized()) {
                    _setConfigState_inlock(kConfigSteady);
                }
                else {
                    _setConfigState_inlock(kConfigUninitialized);
                }
                return;
            }

            lk.unlock();

            _externalState->startThreads();
        }

        const stdx::function<void (const ReplicationExecutor::CallbackData&)> reconfigFinishFn(
                stdx::bind(&ReplicationCoordinatorImpl::_heartbeatReconfigFinish,
                           this,
                           stdx::placeholders::_1,
                           newConfig,
                           myIndex));

        // Make sure that the reconfigFinishFn doesn't finish until we've reset
        // _heartbeatReconfigThread.
        lk.lock();
        if (_memberState.primary()) {
            // If the primary is receiving a heartbeat reconfig, that strongly suggests
            // that there has been a force reconfiguration.  In any event, it might lead
            // to this node stepping down as primary, so we'd better do it with the global
            // lock.
            _replExecutor.scheduleWorkWithGlobalExclusiveLock(reconfigFinishFn);
        }
        else {
            _replExecutor.scheduleWork(reconfigFinishFn);
        }
    }