Example #1
        void down(HeartbeatInfo& mem, string msg) {
            // if we've received a heartbeat from this member within the last two seconds, don't
            // change its state to down (if it's already down, leave it down since we don't have
            // any info about it other than it's heartbeating us)
            if (m.lastHeartbeatRecv+2 >= time(0)) {
                log() << "replset info " << h.toString()
                      << " just heartbeated us, but our heartbeat failed: " << msg
                      << ", not changing state" << rsLog;
                // we don't update any of the heartbeat info, though, since we didn't get any info
                // other than "not down" from having it heartbeat us

            mem.authIssue = false;
            mem.health = 0.0;
            mem.ping = 0;
            if( mem.upSince || mem.downSince == 0 ) {
                mem.upSince = 0;
                mem.downSince = jsTime();
                mem.hbstate = MemberState::RS_DOWN;
                log() << "replSet info " << h.toString() << " is down (or slow to respond): " << msg << rsLog;
            mem.lastHeartbeatMsg = msg;
Example #2
bool isSelf(const HostAndPort& hostAndPort, ServiceContext* const ctx) {
    // Fastpath: check if the host&port in question is bound to one
    // of the interfaces on this machine.
    // No need for ip match if the ports do not match
    if (hostAndPort.port() == serverGlobalParams.port) {
        std::vector<std::string> myAddrs = serverGlobalParams.bind_ips;

        // If any of the bound addresses is the default route ( on IPv4) it means we are
        // listening on all network interfaces and need to check against any of them.
        if (myAddrs.empty() ||
            std::any_of(myAddrs.cbegin(), myAddrs.cend(), [](std::string const& addrStr) {
                return HostAndPort(addrStr, serverGlobalParams.port).isDefaultRoute();
            })) {
            myAddrs = getBoundAddrs(IPv6Enabled());

        const std::vector<std::string> hostAddrs =
            getAddrsForHost(hostAndPort.host(), hostAndPort.port(), IPv6Enabled());

        for (std::vector<std::string>::const_iterator i = myAddrs.begin(); i != myAddrs.end();
             ++i) {
            for (std::vector<std::string>::const_iterator j = hostAddrs.begin();
                 j != hostAddrs.end();
                 ++j) {
                if (*i == *j) {
                    return true;


    try {
        DBClientConnection conn;
        conn.setSoTimeout(30);  // 30 second timeout

        // We need to avoid the isMaster call triggered by a normal connect, which would
        // cause a deadlock. 'isSelf' is called by the Replication Coordinator when validating
        // a replica set configuration document, but the 'isMaster' command requires a lock on the
        // replication coordinator to execute. As such we call we call 'connectSocketOnly', which
        // does not call 'isMaster'.
        if (!conn.connectSocketOnly(hostAndPort).isOK()) {
            return false;

        if (auth::isInternalAuthSet() && !conn.authenticateInternalUser().isOK()) {
            return false;
        BSONObj out;
        bool ok = conn.simpleCommand("admin", &out, "_isSelf");
        bool me = ok && out["id"].type() == jstOID && instanceId == out["id"].OID();

        return me;
    } catch (const std::exception& e) {
        warning() << "couldn't check isSelf (" << hostAndPort << ") " << e.what() << std::endl;

    return false;
Example #3
StatusWith<ConnectionString> ConnectionString::parse(const std::string& url) {
    const std::string::size_type i = url.find('/');

    // Replica set
    if (i != std::string::npos && i != 0) {
        return ConnectionString(SET, url.substr(i + 1), url.substr(0, i));

    const int numCommas = str::count(url, ',');

    // Single host
    if (numCommas == 0) {
        HostAndPort singleHost;
        Status status = singleHost.initialize(url);
        if (!status.isOK()) {
            return status;

        return ConnectionString(singleHost);

    // Sharding config server
    if (numCommas == 2) {
        return ConnectionString(SYNC, url, "");

    return Status(ErrorCodes::FailedToParse, str::stream() << "invalid url [" << url << "]");
Example #4
    bool isSelf(const HostAndPort& hostAndPort) {

        // Fastpath: check if the host&port in question is bound to one
        // of the interfaces on this machine.
        // No need for ip match if the ports do not match
        if (hostAndPort.port() == serverGlobalParams.port) {
            std::vector<std::string> myAddrs = serverGlobalParams.bind_ip.empty() ?
              getBoundAddrs(IPv6Enabled()) :

            if (!serverGlobalParams.bind_ip.empty()) {
                boost::split(myAddrs, serverGlobalParams.bind_ip, boost::is_any_of(", "));

            const std::vector<std::string> hostAddrs = getAddrsForHost(hostAndPort.host(),

            for (std::vector<std::string>::const_iterator i = myAddrs.begin();
                 i != myAddrs.end(); ++i) {
                for (std::vector<std::string>::const_iterator j = hostAddrs.begin();
                     j != hostAddrs.end(); ++j) {
                    if (*i == *j) {
                        return true;

        if (!Listener::getTimeTracker()) {
            // this ensures we are actually running a server
            // this may return true later, so may want to retry
            return false;

        try {
            DBClientConnection conn;
            std::string errmsg;
            if (!conn.connect(hostAndPort, errmsg)) {
                return false;

            if (getGlobalAuthorizationManager()->isAuthEnabled() && isInternalAuthSet()) {
                if (!authenticateInternalUser(&conn)) {
                    return false;
            BSONObj out;
            bool ok = conn.simpleCommand("admin" , &out, "_isSelf");
            bool me = ok && out["id"].type() == jstOID && instanceId == out["id"].OID();

            return me;
        catch (const std::exception& e) {
            warning() << "could't check isSelf (" << hostAndPort << ") " << e.what() << std::endl;

        return false;
Example #5
 StatusWith<HostAndPort> HostAndPort::parse(const StringData& text) {
     HostAndPort result;
     Status status = result.initialize(text);
     if (!status.isOK()) {
         return StatusWith<HostAndPort>(status);
     return StatusWith<HostAndPort>(result);
Example #6
        virtual bool run(OperationContext* txn,
                         const string& ,
                         BSONObj& cmdObj,
                         int, string& errmsg,
                         BSONObjBuilder& result,
                         bool fromRepl) {

            BSONObj configObj;
            if( cmdObj["replSetInitiate"].type() == Object ) {
                configObj = cmdObj["replSetInitiate"].Obj();

            if (configObj.isEmpty()) {
                result.append("info2", "no configuration explicitly specified -- making one");
                log() << "replSet info initiate : no configuration specified.  "
                    "Using a default configuration for the set";

                ReplicationCoordinatorExternalStateImpl externalState;
                std::string name;
                std::vector<HostAndPort> seeds;
                std::set<HostAndPort> seedSet;
                        seedSet); // may throw...

                BSONObjBuilder b;
                b.append("_id", name);
                b.append("version", 1);
                BSONObjBuilder members;
                HostAndPort me = someHostAndPortForMe();
                members.append("0", BSON( "_id" << 0 << "host" << me.toString() ));
                result.append("me", me.toString());
                for( unsigned i = 0; i < seeds.size(); i++ ) {
                                   BSON( "_id" << i+1 << "host" << seeds[i].toString()));
                b.appendArray("members", members.obj());
                configObj = b.obj();
                log() << "replSet created this configuration for initiation : " <<

            if (configObj.getField("version").eoo()) {
                // Missing version field defaults to version 1.
                BSONObjBuilder builder;
                builder.append("version", 1);
                configObj = builder.obj();

            Status status = getGlobalReplicationCoordinator()->processReplSetInitiate(txn,
            return appendCommandStatus(result, status);
Status ShardingEgressMetadataHook::readReplyMetadata(const HostAndPort& replySource,
                                                     const BSONObj& metadataObj) {
    try {
        _saveGLEStats(metadataObj, replySource.toString());
        return _advanceConfigOptimeFromShard(replySource.toString(), metadataObj);
    } catch (...) {
        return exceptionToStatus();
bool HostAndPort::operator==(const HostAndPort& rhs) const {
    if (this->_host == rhs._host) {
        if (this->hasPort() && rhs.hasPort()) {
            return (this->getPort() == rhs.getPort());
        } else if (!this->hasPort() && !rhs.hasPort()) {
            return true;
    return false;
Example #9
    void PubSubSendSocket::initSharding(const std::string configServers) {
        if (!pubsubEnabled)

        vector<string> configdbs;
        splitStringDelim(configServers, &configdbs, ',');

        // find config db we are using for pubsub
        HostAndPort maxConfigHP;

        for (vector<string>::iterator it = configdbs.begin(); it != configdbs.end(); it++) {
            HostAndPort configHP = HostAndPort(*it);
            if (configHP.port() > maxConfigHP.port())
                maxConfigHP = configHP;

        HostAndPort configPullEndpoint = HostAndPort(maxConfigHP.host(), maxConfigHP.port() + 1234);

        try {
            dbEventSocket = new zmq::socket_t(zmqContext, ZMQ_PUSH);
            dbEventSocket->connect(("tcp://" + configPullEndpoint.toString()).c_str());
        catch (zmq::error_t& e) {
            log() << "PubSub could not connect to config server. Turning off db events..."
                  << causedBy(e);
            publishDataEvents = false;

Status ShardingNetworkConnectionHook::validateHostImpl(
    const HostAndPort& remoteHost, const executor::RemoteCommandResponse& isMasterReply) {
    auto shard = grid.shardRegistry()->getShardNoReload(remoteHost.toString());
    if (!shard) {
        return {ErrorCodes::ShardNotFound,
                str::stream() << "No shard found for host: " << remoteHost.toString()};

    long long configServerModeNumber;
    auto status = bsonExtractIntegerField(isMasterReply.data, "configsvr", &configServerModeNumber);

    switch (status.code()) {
        case ErrorCodes::OK: {
            // The ismaster response indicates remoteHost is a config server.
            if (!shard->isConfig()) {
                return {ErrorCodes::InvalidOptions,
                        str::stream() << "Surprised to discover that " << remoteHost.toString()
                                      << " believes it is a config server"};
            using ConfigServerMode = CatalogManager::ConfigServerMode;
            const BSONElement setName = isMasterReply.data["setName"];
            return grid.forwardingCatalogManager()->scheduleReplaceCatalogManagerIfNeeded(
                (configServerModeNumber == 0 ? ConfigServerMode::SCCC : ConfigServerMode::CSRS),
                (setName.type() == String ? setName.valueStringData() : StringData()),
        case ErrorCodes::NoSuchKey: {
            // The ismaster response indicates that remoteHost is not a config server, or that
            // the config server is running a version prior to the 3.1 development series.
            if (!shard->isConfig()) {
                return Status::OK();
            long long remoteMaxWireVersion;
            status = bsonExtractIntegerFieldWithDefault(isMasterReply.data,
            if (!status.isOK()) {
                return status;
            if (remoteMaxWireVersion < FIND_COMMAND) {
                // Prior to the introduction of the find command and the 3.1 release series, it was
                // not possible to distinguish a config server from a shard server from its ismaster
                // response. As such, we must assume that the system is properly configured.
                return Status::OK();
            return {ErrorCodes::InvalidOptions,
                    str::stream() << "Surprised to discover that " << remoteHost.toString()
                                  << " does not believe it is a config server"};
            // The ismaster response was malformed.
            return status;
// Newly elected primary with electionId >= maximum electionId seen by the Refresher
TEST(ReplicaSetMonitorTests, NewPrimaryWithMaxElectionId) {
    SetStatePtr state = boost::make_shared<SetState>("name", basicSeedsSet);
    Refresher refresher(state);

    set<HostAndPort> seen;

    // get all hosts to contact first
    for (size_t i = 0; i != basicSeeds.size(); ++i) {
        NextStep ns = refresher.getNextStep();
        ASSERT_EQUALS(ns.step, NextStep::CONTACT_HOST);

    const ReadPreferenceSetting primaryOnly(ReadPreference_PrimaryOnly, TagSet());

    // mock all replies
    for (size_t i = 0; i != basicSeeds.size(); ++i) {
        // All hosts to talk to are already dispatched, but no reply has been received
        NextStep ns = refresher.getNextStep();
        ASSERT_EQUALS(ns.step, NextStep::WAIT);

                                   BSON("setName" << "name"
                                        << "ismaster" << true
                                        << "secondary" << false
                                        << "hosts" << BSON_ARRAY("a" << "b" << "c")
                                        << "electionId" << OID::gen()
                                        << "ok" << true));

        // Ensure the set primary is the host we just got a reply from
        HostAndPort currentPrimary = state->getMatchingHost(primaryOnly);
        ASSERT_EQUALS(currentPrimary.host(), basicSeeds[i].host());
        ASSERT_EQUALS(state->nodes.size(), basicSeeds.size());

        // Check the state of each individual node
        for (size_t j = 0; j != basicSeeds.size(); ++j) {
            Node* node = state->findNode(basicSeeds[j]);
            ASSERT_EQUALS(node->host.toString(), basicSeeds[j].toString());
            ASSERT_EQUALS(node->isUp, j <= i);
            ASSERT_EQUALS(node->isMaster, j == i);

    // Now all hosts have returned data
    NextStep ns = refresher.getNextStep();
    ASSERT_EQUALS(ns.step, NextStep::DONE);
Example #12
void SyncSourceFeedback::run() {

    ReplicationCoordinator* replCoord = getGlobalReplicationCoordinator();
    while (true) {  // breaks once _shutdownSignaled is true
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            while (!_positionChanged && !_shutdownSignaled) {
                if (_cond.wait_for(lock, _keepAliveInterval) == stdx::cv_status::timeout) {

            if (_shutdownSignaled) {

            _positionChanged = false;

        auto txn = cc().makeOperationContext();
        MemberState state = replCoord->getMemberState();
        if (state.primary() || state.startup()) {
        const HostAndPort target = BackgroundSync::get()->getSyncTarget();
        if (_syncTarget != target) {
            _syncTarget = target;
        if (!hasConnection()) {
            // fix connection if need be
            if (target.empty()) {
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
            if (!_connect(txn.get(), target)) {
                stdx::unique_lock<stdx::mutex> lock(_mtx);
                _positionChanged = true;
        Status status = updateUpstream(txn.get());
        if (!status.isOK()) {
            stdx::unique_lock<stdx::mutex> lock(_mtx);
            _positionChanged = true;
Example #13
 void HybridReplicationCoordinator::connectOplogReader(OperationContext* txn,
                                                       BackgroundSync* bgsync,
                                                       OplogReader* r) {
     _legacy.connectOplogReader(txn, bgsync, r);
     HostAndPort legacySyncSource = r->getHost();
     bgsync->connectOplogReader(txn, &_impl, r);
     HostAndPort implSyncSource = r->getHost();
     if (legacySyncSource != implSyncSource) {
         severe() << "sync source mismatch between legacy and impl: " << 
             legacySyncSource.toString() << " and " << implSyncSource.toString();
Status ShardingNetworkConnectionHook::validateHostImpl(
    const HostAndPort& remoteHost, const executor::RemoteCommandResponse& isMasterReply) {
    auto shard = grid.shardRegistry()->getShardForHostNoReload(remoteHost);
    if (!shard) {
        return {ErrorCodes::ShardNotFound,
                str::stream() << "No shard found for host: " << remoteHost.toString()};

    long long configServerModeNumber;
    auto status = bsonExtractIntegerField(isMasterReply.data, "configsvr", &configServerModeNumber);
    // TODO SERVER-22320 fix should collapse the switch to only NoSuchKey handling

    switch (status.code()) {
        case ErrorCodes::OK: {
            // The ismaster response indicates remoteHost is a config server.
            if (!shard->isConfig()) {
                return {ErrorCodes::InvalidOptions,
                        str::stream() << "Surprised to discover that " << remoteHost.toString()
                                      << " believes it is a config server"};
            return Status::OK();
        case ErrorCodes::NoSuchKey: {
            // The ismaster response indicates that remoteHost is not a config server, or that
            // the config server is running a version prior to the 3.1 development series.
            if (!shard->isConfig()) {
                return Status::OK();
            long long remoteMaxWireVersion;
            status = bsonExtractIntegerFieldWithDefault(isMasterReply.data,
            if (!status.isOK()) {
                return status;
            if (remoteMaxWireVersion < FIND_COMMAND) {
                // Prior to the introduction of the find command and the 3.1 release series, it was
                // not possible to distinguish a config server from a shard server from its ismaster
                // response. As such, we must assume that the system is properly configured.
                return Status::OK();
            return {ErrorCodes::InvalidOptions,
                    str::stream() << "Surprised to discover that " << remoteHost.toString()
                                  << " does not believe it is a config server"};
            // The ismaster response was malformed.
            return status;
    Status DBClientShardResolver::findMaster( const std::string connString,
                                              ConnectionString* resolvedHost ) {
        std::string errMsg;

        ConnectionString rawHost = ConnectionString::parse( connString, errMsg );
        dassert( errMsg == "" );
        dassert( rawHost.type() == ConnectionString::SET
                 || rawHost.type() == ConnectionString::MASTER );

        if ( rawHost.type() == ConnectionString::MASTER ) {
            *resolvedHost = rawHost;
            return Status::OK();

        // If we need to, then get the particular node we're targeting in the replica set

        // Don't create the monitor unless we need to - fast path
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get(rawHost.getSetName());

        if (!replMonitor) {
            // Slow path
            std::set<HostAndPort> seedServers(rawHost.getServers().begin(),
            ReplicaSetMonitor::createIfNeeded(rawHost.getSetName(), seedServers);
            replMonitor = ReplicaSetMonitor::get(rawHost.getSetName());

        if (!replMonitor) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawHost.getSetName() );

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert();
            *resolvedHost = ConnectionString::parse( masterHostAndPort.toString(), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
Example #16
Status DBClientConnection::connectSocketOnly(const HostAndPort& serverAddress) {
    _serverAddress = serverAddress;

    if (serverAddress.host().empty()) {
        return Status(ErrorCodes::InvalidOptions,
                      str::stream() << "couldn't connect to server " << _serverAddress.toString()
                                    << ", host is empty");

    if (serverAddress.host() == "") {
        return Status(ErrorCodes::InvalidOptions,
                      str::stream() << "couldn't connect to server " << _serverAddress.toString()
                                    << ", address resolved to");

    transport::ConnectSSLMode sslMode = transport::kGlobalSSLMode;
    // Prefer to get SSL mode directly from our URI, but if it is not set, fall back to
    // checking global SSL params. DBClientConnections create through the shell will have a
    // meaningful URI set, but DBClientConnections created from within the server may not.
    auto options = _uri.getOptions();
    auto iter = options.find("ssl");
    if (iter != options.end()) {
        if (iter->second == "true") {
            sslMode = transport::kEnableSSL;
        } else {
            sslMode = transport::kDisableSSL;


    auto tl = getGlobalServiceContext()->getTransportLayer();
    auto sws = tl->connect(serverAddress, sslMode, _socketTimeout.value_or(Milliseconds{5000}));
    if (!sws.isOK()) {
        return Status(ErrorCodes::HostUnreachable,
                      str::stream() << "couldn't connect to server " << _serverAddress.toString()
                                    << ", connection attempt failed: "
                                    << sws.getStatus());

    _session = std::move(sws.getValue());
    _sessionCreationMicros = curTimeMicros64();
    _lastConnectivityCheck = Date_t::now();
    _failed = false;
    LOG(1) << "connected to server " << toString();
    return Status::OK();
Example #17
static void appendGeneric(const HostAndPort& hp, const SinkFunc& write) {
    // wrap ipv6 addresses in []s for roundtrip-ability
    if (hp.host().find(':') != std::string::npos) {
    } else {
    if (hp.host().find('/') == std::string::npos) {
Example #18
        void down(HeartbeatInfo& mem, string msg) {
            // if we've received a heartbeat from this member within the last two seconds, don't
            // change its state to down (if it's already down, leave it down since we don't have
            // any info about it other than it's heartbeating us)

            // This code is essentially a no-op in vanilla MongoDB thanks to
            // SERVER-11280. I (Zardosht) am reluctant to fix it because
            // I don't know what impact it may have on elections and failover.
            // For now, commenting out because we are moving lastHeartbeatRecv
            // out of HeartbeatInfo and into Member
            if (m.lastHeartbeatRecv+2 >= time(0)) {
                log() << "replset info " << h.toString()
                      << " just heartbeated us, but our heartbeat failed: " << msg
                      << ", not changing state" << rsLog;
                // we don't update any of the heartbeat info, though, since we didn't get any info
                // other than "not down" from having it heartbeat us

            mem.authIssue = false;
            mem.health = 0.0;
            mem.ping = 0;
            if( mem.upSince || mem.downSince == 0 ) {
                mem.upSince = 0;
                mem.downSince = jsTime();
                mem.hbstate = MemberState::RS_DOWN;
                log() << "replSet info " << h.toString() << " is down (or slow to respond): " << msg << rsLog;
            mem.lastHeartbeatMsg = msg;
void MemberHeartbeatData::setUpValues(Date_t now,
                                      const HostAndPort& host,
                                      ReplSetHeartbeatResponse hbResponse) {
    _health = 1;
    if (_upSince == Date_t()) {
        _upSince = now;
    _authIssue = false;
    _lastHeartbeat = now;
    if (!hbResponse.hasState()) {
    if (!hbResponse.hasElectionTime()) {
    if (!hbResponse.hasOpTime()) {

    // Log if the state changes
    if (_lastResponse.getState() != hbResponse.getState()) {
        log() << "Member " << host.toString() << " is now in state "
              << hbResponse.getState().toString() << rsLog;

    _lastResponse = hbResponse;
ShardingNetworkConnectionHook::makeRequest(const HostAndPort& remoteHost) {
    if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
        // TODO: SERVER-23973 Temporary crutch until we decide where to get the config server
        // connection string.
        return {boost::none};

    auto shard = grid.shardRegistry()->getShardForHostNoReload(remoteHost);
    if (!shard) {
        return {ErrorCodes::ShardNotFound,
                str::stream() << "No shard found for host: " << remoteHost.toString()};
    if (shard->isConfig()) {
        // No need to initialize sharding metadata if talking to a config server
        return {boost::none};

    SetShardVersionRequest ssv = SetShardVersionRequest::makeForInitNoPersist(
    executor::RemoteCommandRequest request;
    request.dbname = "admin";
    request.target = remoteHost;
    request.timeout = stdx::chrono::seconds{30};
    request.cmdObj = ssv.toBSON();

    return {request};
    // check if we can connect to the host and determine the host's network type
    BSONObj PingMonitorThreadManager::getConnInfo( HostAndPort& hp ){
	BSONObjBuilder toReturn;
	scoped_ptr< ScopedDbConnection > connPtr;
	    connPtr.reset( new ScopedDbConnection( hp.toString() , socketTimeout ) );
            ScopedDbConnection& conn = *connPtr;
	    BSONObj isMasterResults;
	    conn->runCommand( "admin" , BSON( "isMaster" << 1 ) , isMasterResults );

	    if( isMasterResults["msg"].trueValue() ){
		toReturn.append( "networkType" , shardedCluster );
		auto_ptr<DBClientCursor> cursor( conn->query( "config.version" , BSONObj() ) );
		toReturn.append( "collectionPrefix" , cursor->nextSafe()["clusterId"].__oid().toString() ); 
	    else if( isMasterResults["setName"].trueValue() ){
		toReturn.append( "networkType" , replicaSet );
		toReturn.append( "collectionPrefix" , isMasterResults["setName"].valuestrsafe() );
		toReturn.append( "isNotMaster" , false );
	} catch( DBException& e ){
	    toReturn.append( "errmsg" , e.toString() );
	if( connPtr != 0 ) connPtr->done();

	return toReturn.obj();
Example #22
 void down(HeartbeatInfo& mem, string msg) {
     mem.health = 0.0;
     if( mem.upSince ) {
         mem.upSince = 0;
         log() << "replSet info " << h.toString() << " is now down" << rsLog;
     mem.lastHeartbeatMsg = msg;
Example #23
        void doWork() { 
            HeartbeatInfo mem = m;
            HeartbeatInfo old = mem;
            try { 
                BSONObj info;
                int theirConfigVersion = -10000;
                bool ok = requestHeartbeat(theReplSet->name(), h.toString(), info, theReplSet->config().version, theirConfigVersion);
                mem.lastHeartbeat = time(0); // we set this on any response - we don't get this far if couldn't connect because exception is thrown
                    be state = info["state"];
                    if( state.ok() )
                        mem.hbstate = (MemberState) state.Int();
                if( ok ) {
                    if( mem.upSince == 0 ) {
                        log() << "replSet info " << h.toString() << " is now up" << rsLog;
                        mem.upSince = mem.lastHeartbeat;
                    mem.health = 1.0;
                    mem.lastHeartbeatMsg = "";

                    be cfg = info["config"];
                    if( cfg.ok() ) {
                        // received a new config
                        boost::function<void()> f = 
                            boost::bind(&ReplSet::Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy());
                else { 
                    down(mem, info.getStringField("errmsg"));
            catch(...) { 
                down(mem, "connect/transport error");             
            m = mem;
            theReplSet->mgr->send( boost::bind(&ReplSet::msgUpdateHBInfo, theReplSet, mem) );

            static time_t last = 0;
            time_t now = time(0);
            if( mem.changed(old) || now-last>4 ) {
                last = now;
                theReplSet->mgr->send( boost::bind(&ReplSet::Manager::msgCheckNewState, theReplSet->mgr) );
Example #24
/** @param cfgString <setname>/<seedhost1>,<seedhost2> */
void parseReplsetCmdLine(const std::string& cfgString,
                         string& setname,
                         vector<HostAndPort>& seeds,
                         set<HostAndPort>& seedSet ) {
    const char *p = cfgString.c_str();
    const char *slash = strchr(p, '/');
    if( slash )
        setname = string(p, slash-p);
        setname = p;
            "bad --replSet config string format is: <setname>[/<seedhost1>,<seedhost2>,...]",

    if( slash == 0 )

    p = slash + 1;
    while( 1 ) {
        const char *comma = strchr(p, ',');
        if( comma == 0 ) comma = strchr(p,0);
        if( p == comma )
            HostAndPort m;
            try {
                m = HostAndPort( string(p, comma-p) );
            catch(...) {
                uassert(13114, "bad --replSet seed hostname", false);
            uassert(13096, "bad --replSet command line config string - dups?",
                    seedSet.count(m) == 0);
            //uassert(13101, "can't use localhost in replset host list", !m.isLocalHost());
            if( m.isSelf() ) {
                LOG(1) << "replSet ignoring seed " << m.toString() << " (=self)" << rsLog;
            if( *comma == 0 )
            p = comma + 1;
Example #25
 void down(HeartbeatInfo& mem, string msg) {
     mem.health = 0.0;
     if( mem.upSince || mem.downSince == 0 ) {
         mem.upSince = 0;
         mem.downSince = jsTime();
         log() << "replSet info " << h.toString() << " is now down (or slow to respond)" << rsLog;
     mem.lastHeartbeatMsg = msg;
Status ReplicationCoordinatorExternalStateImpl::multiInitialSyncApply(
    MultiApplier::OperationPtrs* ops, const HostAndPort& source, AtomicUInt32* fetchCount) {
    // repl::multiInitialSyncApply uses SyncTail::shouldRetry() (and implicitly getMissingDoc())
    // to fetch missing documents during initial sync. Therefore, it is fine to construct SyncTail
    // with invalid BackgroundSync, MultiSyncApplyFunc and writerPool arguments because we will not
    // be accessing any SyncTail functionality that require these constructor parameters.
    SyncTail syncTail(nullptr, SyncTail::MultiSyncApplyFunc(), nullptr);
    return repl::multiInitialSyncApply(ops, &syncTail, fetchCount);
Example #27
    Status DBClientShardResolver::findMaster( const std::string connString,
                                              ConnectionString* resolvedHost ) {
        std::string errMsg;

        ConnectionString rawHost = ConnectionString::parse( connString, errMsg );
        dassert( errMsg == "" );
        dassert( rawHost.type() == ConnectionString::SET
                 || rawHost.type() == ConnectionString::MASTER );

        if ( rawHost.type() == ConnectionString::MASTER ) {
            *resolvedHost = rawHost;
            return Status::OK();

        // If we need to, then get the particular node we're targeting in the replica set

        // Does not reload the monitor if it doesn't currently exist
        ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawHost.getSetName(),
                                                                   false );
        if ( !replMonitor ) {
            return Status( ErrorCodes::ReplicaSetNotFound,
                           string("unknown replica set ") + rawHost.getSetName() );

        try {
            // This can throw when we don't find a master!
            HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert();
            *resolvedHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg );
            dassert( errMsg == "" );
            return Status::OK();
        catch ( const DBException& ) {
            return Status( ErrorCodes::HostNotFound,
                           string("could not contact primary for replica set ")
                           + replMonitor->getName() );

        // Unreachable
        dassert( false );
        return Status( ErrorCodes::UnknownError, "" );
Example #28
bool BackgroundSync::_shouldChangeSyncSource(const HostAndPort& syncSource) {
    // is it even still around?
    if (getSyncTarget().empty() || syncSource.empty()) {
        return true;

    // check other members: is any member's optime more than MaxSyncSourceLag seconds
    // ahead of the current sync source?
    return _replCoord->shouldChangeSyncSource(syncSource);
StatusWith<HostAndPort> RemoteCommandTargeterRS::findHost(const ReadPreferenceSetting& readPref) {
    if (!_rsMonitor) {
        return Status(ErrorCodes::ReplicaSetNotFound,
                      str::stream() << "unknown replica set " << _rsName);

    HostAndPort hostAndPort = _rsMonitor->getHostOrRefresh(readPref);
    if (hostAndPort.empty()) {
        if (readPref.pref == ReadPreference::PrimaryOnly) {
            return Status(ErrorCodes::NotMaster,
                          str::stream() << "No master found for set " << _rsName);
        return Status(ErrorCodes::FailedToSatisfyReadPreference,
                      str::stream() << "could not find host matching read preference "
                                    << readPref.toString() << " for set " << _rsName);

    return hostAndPort;
Example #30
 void down(HeartbeatInfo& mem, string msg) {
     mem.health = 0.0;
     if( mem.upSince || mem.downSince == 0 ) {
         mem.upSince = 0;
         mem.downSince = jsTime();
         mem.hbstate = MemberState::RS_DOWN;
         log() << "replSet info " << h.toString() << " is down (or slow to respond): " << msg << rsLog;
     mem.lastHeartbeatMsg = msg;