shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor( const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); // do not restart taskExecutor if is in shutdown if (!_taskExecutor && !_isShutdown) { // construct task executor auto net = executor::makeNetworkInterface("ReplicaSetMonitor-TaskExecutor"); auto netPtr = net.get(); _taskExecutor = stdx::make_unique<ThreadPoolTaskExecutor>( stdx::make_unique<NetworkInterfaceThreadPool>(netPtr), std::move(net)); LOG(1) << "Starting up task executor for monitoring replica sets in response to request to " "monitor set: " << connStr.toString(); _taskExecutor->startup(); } auto setName = connStr.getSetName(); auto monitor = _monitors[setName].lock(); if (monitor) { return monitor; } const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end()); log() << "Starting new replica set monitor for " << connStr.toString(); auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers); _monitors[setName] = newMonitor; newMonitor->init(); return newMonitor; }
Status DBClientShardResolver::findMaster( const std::string connString, ConnectionString* resolvedHost ) { std::string errMsg; ConnectionString rawHost = ConnectionString::parse( connString, errMsg ); dassert( errMsg == "" ); dassert( rawHost.type() == ConnectionString::SET || rawHost.type() == ConnectionString::MASTER ); if ( rawHost.type() == ConnectionString::MASTER ) { *resolvedHost = rawHost; return Status::OK(); } // // If we need to, then get the particular node we're targeting in the replica set // // Don't create the monitor unless we need to - fast path ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get(rawHost.getSetName()); if (!replMonitor) { // Slow path std::set<HostAndPort> seedServers(rawHost.getServers().begin(), rawHost.getServers().end()); ReplicaSetMonitor::createIfNeeded(rawHost.getSetName(), seedServers); replMonitor = ReplicaSetMonitor::get(rawHost.getSetName()); } if (!replMonitor) { return Status( ErrorCodes::ReplicaSetNotFound, string("unknown replica set ") + rawHost.getSetName() ); } try { // This can throw when we don't find a master! HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert(); *resolvedHost = ConnectionString::parse( masterHostAndPort.toString(), errMsg ); dassert( errMsg == "" ); return Status::OK(); } catch ( const DBException& ) { return Status( ErrorCodes::HostNotFound, string("could not contact primary for replica set ") + replMonitor->getName() ); } // Unreachable dassert( false ); return Status( ErrorCodes::UnknownError, "" ); }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor(const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); shared_ptr<ReplicaSetMonitor>& monitor = _monitors[connStr.getSetName()]; if (!monitor) { const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end()); monitor = std::make_shared<ReplicaSetMonitor>(connStr.getSetName(), servers); } return monitor; }
void ShardRegistry::remove(const ShardId& id) { stdx::lock_guard<stdx::mutex> lk(_mutex); set<string> entriesToRemove; for (const auto& i : _lookup) { shared_ptr<Shard> s = i.second; if (s->getId() == id) { entriesToRemove.insert(i.first); ConnectionString connStr = s->getConnString(); for (const auto& host : connStr.getServers()) { entriesToRemove.insert(host.toString()); } } } for (const auto& entry : entriesToRemove) { _lookup.erase(entry); } for (ShardMap::iterator i = _rsLookup.begin(); i != _rsLookup.end();) { shared_ptr<Shard> s = i->second; if (s->getId() == id) { _rsLookup.erase(i++); } else { ++i; } } shardConnectionPool.removeHost(id); ReplicaSetMonitor::remove(id); }
void ShardRegistry::_updateLookupMapsForShard_inlock(shared_ptr<Shard> shard, const ConnectionString& newConnString) { auto oldConnString = shard->getConnString(); for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); } _lookup[shard->getId()] = shard; if (newConnString.type() == ConnectionString::SET) { _rsLookup[newConnString.getSetName()] = shard; } else if (newConnString.type() == ConnectionString::CUSTOM) { // CUSTOM connection strings (ie "$dummy:10000) become DBDirectClient connections which // always return "localhost" as their resposne to getServerAddress(). This is just for // making dbtest work. _lookup["localhost"] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[newConnString.toString()] = shard; for (const HostAndPort& hostAndPort : newConnString.getServers()) { _lookup[hostAndPort.toString()] = shard; } }
/* static */ void WriteBackListener::init( DBClientBase& conn ) { if ( conn.type() == ConnectionString::SYNC ) { // don't want write back listeners for config servers return; } if ( conn.type() != ConnectionString::SET ) { init( conn.getServerAddress() ); return; } { scoped_lock lk( _cacheLock ); if ( _seenSets.count( conn.getServerAddress() ) ) return; } // we want to do writebacks on all rs nodes string errmsg; ConnectionString cs = ConnectionString::parse( conn.getServerAddress() , errmsg ); uassert( 13641 , str::stream() << "can't parse host [" << conn.getServerAddress() << "]" , cs.isValid() ); vector<HostAndPort> hosts = cs.getServers(); for ( unsigned i=0; i<hosts.size(); i++ ) init( hosts[i].toString() ); }
std::unique_ptr<RemoteCommandTargeter> RemoteCommandTargeterFactoryImpl::create( const ConnectionString& connStr) { switch (connStr.type()) { case ConnectionString::MASTER: case ConnectionString::CUSTOM: invariant(connStr.getServers().size() == 1); return stdx::make_unique<RemoteCommandTargeterStandalone>(connStr.getServers().front()); case ConnectionString::SET: return stdx::make_unique<RemoteCommandTargeterRS>(connStr.getSetName(), connStr.getServers()); case ConnectionString::INVALID: // These connections should never be seen break; } MONGO_UNREACHABLE; }
void MongoConnectionPool::removeHost(const ConnectionString& host) { lock_guard<mutex> lock(m_Mutex); cout<< "Removing connections from all pools for host " << host.getServers()[0].toString() << endl; for(auto i: m_Pools) { if(BuildHostString(host).compare(i.first) == 0) i.second.clear(); } }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor( const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); _setupTaskExecutorInLock(connStr.toString()); auto setName = connStr.getSetName(); auto monitor = _monitors[setName].lock(); if (monitor) { return monitor; } const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end()); log() << "Starting new replica set monitor for " << connStr.toString(); auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers); _monitors[setName] = newMonitor; newMonitor->init(); return newMonitor; }
/** * Returns the remote time as reported by the cluster or server. The maximum difference between the * reported time and the actual time on the remote server (at the completion of the function) is the * maxNetSkew */ Date_t DistributedLock::remoteTime(const ConnectionString& cluster, unsigned long long maxNetSkew) { ConnectionString server(*cluster.getServers().begin()); // Get result and delay if successful, errMsg if not bool success = false; BSONObj result; string errMsg; Milliseconds delay{0}; unique_ptr<ScopedDbConnection> connPtr; try { connPtr.reset(new ScopedDbConnection(server.toString())); ScopedDbConnection& conn = *connPtr; Date_t then = jsTime(); success = conn->runCommand(string("admin"), BSON("serverStatus" << 1), result); delay = jsTime() - then; if (!success) errMsg = result.toString(); conn.done(); } catch (const DBException& ex) { if (connPtr && connPtr->get()->isFailed()) { // Return to the pool so the pool knows about the failure connPtr->done(); } success = false; errMsg = ex.toString(); } if (!success) { throw TimeNotFoundException(str::stream() << "could not get status from server " << server.toString() << " in cluster " << cluster.toString() << " to check time" << causedBy(errMsg), 13647); } // Make sure that our delay is not more than 2x our maximum network skew, since this is the max // our remote time value can be off by if we assume a response in the middle of the delay. if (delay > Milliseconds(maxNetSkew * 2)) { throw TimeNotFoundException( str::stream() << "server " << server.toString() << " in cluster " << cluster.toString() << " did not respond within max network delay of " << maxNetSkew << "ms", 13648); } return result["localTime"].Date() - (delay / 2); }
bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { errmsg.clear(); // get replica set component hosts ConnectionString servers = ConnectionString::parse( cmdObj.firstElement().valuestrsafe() , errmsg ); if ( ! errmsg.empty() ) { log() << "addshard request " << cmdObj << " failed:" << errmsg << endl; return false; } // using localhost in server names implies every other process must use localhost addresses too vector<HostAndPort> serverAddrs = servers.getServers(); for ( size_t i = 0 ; i < serverAddrs.size() ; i++ ) { if ( serverAddrs[i].isLocalHost() != grid.allowLocalHost() ) { errmsg = str::stream() << "can't use localhost as a shard since all shards need to communicate. " << "either use all shards and configdbs in localhost or all in actual IPs " << " host: " << serverAddrs[i].toString() << " isLocalHost:" << serverAddrs[i].isLocalHost(); log() << "addshard request " << cmdObj << " failed: attempt to mix localhosts and IPs" << endl; return false; } // it's fine if mongods of a set all use default port if ( ! serverAddrs[i].hasPort() ) { serverAddrs[i].setPort( CmdLine::ShardServerPort ); } } // name is optional; addShard will provide one if needed string name = ""; if ( cmdObj["name"].type() == String ) { name = cmdObj["name"].valuestrsafe(); } // maxSize is the space usage cap in a shard in MBs long long maxSize = 0; if ( cmdObj[ ShardFields::maxSize.name() ].isNumber() ) { maxSize = cmdObj[ ShardFields::maxSize.name() ].numberLong(); } if ( ! grid.addShard( &name , servers , maxSize , errmsg ) ) { log() << "addshard request " << cmdObj << " failed: " << errmsg << endl; return false; } result << "shardAdded" << name; return true; }
void ShardRegistryData::_addShard(WithLock lk, std::shared_ptr<Shard> const& shard, bool useOriginalCS) { const ShardId shardId = shard->getId(); const ConnectionString connString = useOriginalCS ? shard->originalConnString() : shard->getConnString(); auto currentShard = _findByShardId(lk, shardId); if (currentShard) { auto oldConnString = currentShard->originalConnString(); if (oldConnString.toString() != connString.toString()) { log() << "Updating ShardRegistry connection string for shard " << currentShard->getId() << " from: " << oldConnString.toString() << " to: " << connString.toString(); } for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); _hostLookup.erase(host); } _lookup.erase(oldConnString.toString()); } _lookup[shard->getId()] = shard; LOG(3) << "Adding shard " << shard->getId() << ", with CS " << connString.toString(); if (connString.type() == ConnectionString::SET) { _rsLookup[connString.getSetName()] = shard; } else if (connString.type() == ConnectionString::CUSTOM) { // CUSTOM connection strings (ie "$dummy:10000) become DBDirectClient connections which // always return "localhost" as their response to getServerAddress(). This is just for // making dbtest work. _lookup[ShardId("localhost")] = shard; _hostLookup[HostAndPort("localhost")] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[connString.toString()] = shard; for (const HostAndPort& hostAndPort : connString.getServers()) { _lookup[hostAndPort.toString()] = shard; _hostLookup[hostAndPort] = shard; } }
/** * Returns the currently-set config hosts for a cluster */ static vector<ConnectionString> getConfigHosts() { vector<ConnectionString> configHosts; ConnectionString configHostOrHosts = configServer.getConnectionString(); if (configHostOrHosts.type() == ConnectionString::MASTER) { configHosts.push_back(configHostOrHosts); } else if (configHostOrHosts.type() == ConnectionString::SYNC) { vector<HostAndPort> configHPs = configHostOrHosts.getServers(); for (vector<HostAndPort>::iterator it = configHPs.begin(); it != configHPs.end(); ++it) { configHosts.push_back(ConnectionString(*it)); } } else { // This is only for tests. dassert(configHostOrHosts.type() == ConnectionString::CUSTOM); configHosts.push_back(configHostOrHosts); } return configHosts; }
/** * Skews the clocks of a remote cluster by a particular amount, specified by * the "skewHosts" element in a BSONObj. */ static void skewClocks( ConnectionString& cluster, BSONObj& cmdObj ) { vector<long long> skew; if(cmdObj.hasField("skewHosts")) { bsonArrToNumVector<long long>(cmdObj["skewHosts"], skew); } else { LOG( logLvl ) << "No host clocks to skew." << endl; return; } LOG( logLvl ) << "Skewing clocks of hosts " << cluster << endl; unsigned s = 0; for(vector<long long>::iterator i = skew.begin(); i != skew.end(); ++i,s++) { ConnectionString server( cluster.getServers()[s] ); scoped_ptr<ScopedDbConnection> conn( ScopedDbConnection::getInternalScopedDbConnection( server.toString() ) ); BSONObj result; try { bool success = conn->get()->runCommand( string("admin"), BSON( "_skewClockCommand" << 1 << "skew" << *i ), result ); uassert(13678, str::stream() << "Could not communicate with server " << server.toString() << " in cluster " << cluster.toString() << " to change skew by " << *i, success ); LOG( logLvl + 1 ) << " Skewed host " << server << " clock by " << *i << endl; } catch(...) { conn->done(); throw; } conn->done(); } }
void ShardRegistry::_updateLookupMapsForShard_inlock(shared_ptr<Shard> shard, const ConnectionString& newConnString) { auto oldConnString = shard->getConnString(); for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); } _lookup[shard->getId()] = shard; if (newConnString.type() == ConnectionString::SET) { _rsLookup[newConnString.getSetName()] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[newConnString.toString()] = shard; for (const HostAndPort& hostAndPort : newConnString.getServers()) { _lookup[hostAndPort.toString()] = shard; } }
bool _discover( StateMap& threads , const string& host , const shared_ptr<ServerState>& ss ) { BSONObj info = ss->now; bool found = false; if ( info["repl"].isABSONObj() ) { BSONObj x = info["repl"].Obj(); if ( x["hosts"].isABSONObj() ) if ( _addAll( threads , x["hosts"].Obj() ) ) found = true; if ( x["passives"].isABSONObj() ) if ( _addAll( threads , x["passives"].Obj() ) ) found = true; } if ( ss->mongos ) { for ( unsigned i=0; i<ss->shards.size(); i++ ) { BSONObj x = ss->shards[i]; string errmsg; ConnectionString cs = ConnectionString::parse( x["host"].String() , errmsg ); if ( errmsg.size() ) { cerr << errmsg << endl; continue; } vector<HostAndPort> v = cs.getServers(); for ( unsigned i=0; i<v.size(); i++ ) { if ( _add( threads , v[i].toString() ) ) found = true; } } } return found; }
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; if ( ! name ) { name = &nameInternal; } ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. vector<string> dbNames; try { ScopedDbConnection newShardConn(servers.toString()); newShardConn->getLastError(); if ( newShardConn->type() == ConnectionString::SYNC ) { newShardConn.done(); errMsg = "can't use sync cluster as a shard. for replica set, have to use <setname>/<server1>,<server2>,..."; return false; } BSONObj resIsMongos; bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos ); // should return ok=0, cmd not found if it's a normal mongod if ( ok ) { errMsg = "can't add a mongos process as a shard"; newShardConn.done(); return false; } BSONObj resIsMaster; ok = newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster ); if ( !ok ) { ostringstream ss; ss << "failed running isMaster: " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard has only one host, make sure it is not part of a replica set string setName = resIsMaster["setName"].str(); string commandSetName = servers.getSetName(); if ( commandSetName.empty() && ! setName.empty() ) { ostringstream ss; ss << "host is part of set " << setName << ", use replica set url format <setname>/<server1>,<server2>,...."; errMsg = ss.str(); newShardConn.done(); return false; } if ( !commandSetName.empty() && setName.empty() ) { ostringstream ss; ss << "host did not return a set name, is the replica set still initializing? " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard is part of replica set, make sure it is the right one if ( ! commandSetName.empty() && ( commandSetName != setName ) ) { ostringstream ss; ss << "host is part of a different set: " << setName; errMsg = ss.str(); newShardConn.done(); return false; } if( setName.empty() ) { // check this isn't a --configsvr BSONObj res; bool ok = newShardConn->runCommand("admin",BSON("replSetGetStatus"<<1),res); ostringstream ss; if( !ok && res["info"].type() == String && res["info"].String() == "configsvr" ) { errMsg = "the specified mongod is a --configsvr and should thus not be a shard server"; newShardConn.done(); return false; } } // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of // the set. It is fine if not all members of the set are present in 'servers'. bool foundAll = true; string offendingHost; if ( ! commandSetName.empty() ) { set<string> hostSet; BSONObjIterator iter( resIsMaster["hosts"].Obj() ); while ( iter.more() ) { hostSet.insert( iter.next().String() ); // host:port } if ( resIsMaster["passives"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["passives"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } if ( resIsMaster["arbiters"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["arbiters"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } vector<HostAndPort> hosts = servers.getServers(); for ( size_t i = 0 ; i < hosts.size() ; i++ ) { if (!hosts[i].hasPort()) { hosts[i].setPort(ServerGlobalParams::DefaultDBPort); } string host = hosts[i].toString(); // host:port if ( hostSet.find( host ) == hostSet.end() ) { offendingHost = host; foundAll = false; break; } } } if ( ! foundAll ) { ostringstream ss; ss << "in seed list " << servers.toString() << ", host " << offendingHost << " does not belong to replica set " << setName; errMsg = ss.str(); newShardConn.done(); return false; } // shard name defaults to the name of the replica set if ( name->empty() && ! setName.empty() ) *name = setName; // In order to be accepted as a new shard, that mongod must not have any database name that exists already // in any other shards. If that test passes, the new shard's databases are going to be entered as // non-sharded db's whose primary is the newly added shard. BSONObj resListDB; ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB ); if ( !ok ) { ostringstream ss; ss << "failed listing " << servers.toString() << "'s databases:" << resListDB; errMsg = ss.str(); newShardConn.done(); return false; } BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( newShardConn->type() == ConnectionString::SET ) rsMonitor = ReplicaSetMonitor::get( setName ); newShardConn.done(); } catch ( DBException& e ) { if ( servers.type() == ConnectionString::SET ) { ReplicaSetMonitor::remove( servers.getSetName() ); } ostringstream ss; ss << "couldn't connect to new shard "; ss << e.what(); errMsg = ss.str(); return false; } // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false ); if ( config.get() != NULL ) { ostringstream ss; ss << "can't add shard " << servers.toString() << " because a local database '" << *it; ss << "' exists in another " << config->getPrimary().toString(); errMsg = ss.str(); return false; } } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) { errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append(ShardType::name(), *name); b.append(ShardType::host(), rsMonitor ? rsMonitor->getServerAddress() : servers.toString()); if (maxSize > 0) { b.append(ShardType::maxSize(), maxSize); } BSONObj shardDoc = b.obj(); { ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->findOne(ShardType::ConfigNS, BSON(ShardType::host(servers.toString()))); if ( ! old.isEmpty() ) { errMsg = "host already used"; conn.done(); return false; } conn.done(); } log() << "going to add shard: " << shardDoc << endl; Status result = clusterInsert( ShardType::ConfigNS, shardDoc, WriteConcernOptions::AllConfigs, NULL ); if ( !result.isOK() ) { errMsg = result.reason(); log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; return false; } Shard::reloadShardInfo(); // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name ); if ( ! config ) { log() << "adding shard " << servers << " even though could not add database " << *it << endl; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", *name); shardDetails.append("host", servers.toString()); configServer.logChange("addShard", "", shardDetails.obj()); return true; }
StatusWith<ShardType> ShardingCatalogManager::_validateHostAsShard( OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter, const std::string* shardProposedName, const ConnectionString& connectionString) { auto swCommandResponse = _runCommandForAddShard( opCtx, targeter.get(), NamespaceString::kAdminDb, BSON("isMaster" << 1)); if (swCommandResponse.getStatus() == ErrorCodes::IncompatibleServerVersion) { return swCommandResponse.getStatus().withReason( str::stream() << "Cannot add " << connectionString.toString() << " as a shard because its binary version is not compatible with " "the cluster's featureCompatibilityVersion."); } else if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } // Check for a command response error auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus); if (!resIsMasterStatus.isOK()) { return resIsMasterStatus.withContext(str::stream() << "Error running isMaster against " << targeter->connectionString().toString()); } auto resIsMaster = std::move(swCommandResponse.getValue().response); // Fail if the node being added is a mongos. const std::string msg = resIsMaster.getStringField("msg"); if (msg == "isdbgrid") { return {ErrorCodes::IllegalOperation, "cannot add a mongos as a shard"}; } // Extract the maxWireVersion so we can verify that the node being added has a binary version // greater than or equal to the cluster's featureCompatibilityVersion. We expect an incompatible // binary node to be unable to communicate, returning an IncompatibleServerVersion error, // because of our internal wire version protocol. So we can safely invariant here that the node // is compatible. long long maxWireVersion; Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion); if (!status.isOK()) { return status.withContext(str::stream() << "isMaster returned invalid 'maxWireVersion' " << "field when attempting to add " << connectionString.toString() << " as a shard"); } if (serverGlobalParams.featureCompatibility.getVersion() > ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40) { // If the cluster's FCV is 4.2, or upgrading to / downgrading from, the node being added // must be a v4.2 binary. invariant(maxWireVersion == WireVersion::LATEST_WIRE_VERSION); } else { // If the cluster's FCV is 4.0, the node being added must be a v4.0 or v4.2 binary. invariant(serverGlobalParams.featureCompatibility.getVersion() == ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo40); invariant(maxWireVersion >= WireVersion::LATEST_WIRE_VERSION - 1); } // Check whether there is a master. If there isn't, the replica set may not have been // initiated. If the connection is a standalone, it will return true for isMaster. bool isMaster; status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster); if (!status.isOK()) { return status.withContext(str::stream() << "isMaster returned invalid 'ismaster' " << "field when attempting to add " << connectionString.toString() << " as a shard"); } if (!isMaster) { return {ErrorCodes::NotMaster, str::stream() << connectionString.toString() << " does not have a master. If this is a replica set, ensure that it has a" << " healthy primary and that the set has been properly initiated."}; } const std::string providedSetName = connectionString.getSetName(); const std::string foundSetName = resIsMaster["setName"].str(); // Make sure the specified replica set name (if any) matches the actual shard's replica set if (providedSetName.empty() && !foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host is part of set " << foundSetName << "; " << "use replica set url format " << "<setname>/<server1>,<server2>, ..."}; } if (!providedSetName.empty() && foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host did not return a set name; " << "is the replica set still initializing? " << resIsMaster}; } // Make sure the set name specified in the connection string matches the one where its hosts // belong into if (!providedSetName.empty() && (providedSetName != foundSetName)) { return {ErrorCodes::OperationFailed, str::stream() << "the provided connection string (" << connectionString.toString() << ") does not match the actual set name " << foundSetName}; } // Is it a config server? if (resIsMaster.hasField("configsvr")) { return {ErrorCodes::OperationFailed, str::stream() << "Cannot add " << connectionString.toString() << " as a shard since it is a config server"}; } // If the shard is part of a replica set, make sure all the hosts mentioned in the connection // string are part of the set. It is fine if not all members of the set are mentioned in the // connection string, though. if (!providedSetName.empty()) { std::set<std::string> hostSet; BSONObjIterator iter(resIsMaster["hosts"].Obj()); while (iter.more()) { hostSet.insert(iter.next().String()); // host:port } if (resIsMaster["passives"].isABSONObj()) { BSONObjIterator piter(resIsMaster["passives"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } if (resIsMaster["arbiters"].isABSONObj()) { BSONObjIterator piter(resIsMaster["arbiters"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } for (const auto& hostEntry : connectionString.getServers()) { const auto& host = hostEntry.toString(); // host:port if (hostSet.find(host) == hostSet.end()) { return {ErrorCodes::OperationFailed, str::stream() << "in seed list " << connectionString.toString() << ", host " << host << " does not belong to replica set " << foundSetName << "; found " << resIsMaster.toString()}; } } } std::string actualShardName; if (shardProposedName) { actualShardName = *shardProposedName; } else if (!foundSetName.empty()) { // Default it to the name of the replica set actualShardName = foundSetName; } // Disallow adding shard replica set with name 'config' if (actualShardName == NamespaceString::kConfigDb) { return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"}; } // Retrieve the most up to date connection string that we know from the replica set monitor (if // this is a replica set shard, otherwise it will be the same value as connectionString). ConnectionString actualShardConnStr = targeter->connectionString(); ShardType shard; shard.setName(actualShardName); shard.setHost(actualShardConnStr.toString()); shard.setState(ShardType::ShardState::kShardAware); return shard; }
StatusWith<boost::optional<ShardType>> ShardingCatalogManager::_checkIfShardExists( OperationContext* opCtx, const ConnectionString& proposedShardConnectionString, const std::string* proposedShardName, long long proposedShardMaxSize) { // Check whether any host in the connection is already part of the cluster. const auto existingShards = Grid::get(opCtx)->catalogClient()->getAllShards( opCtx, repl::ReadConcernLevel::kLocalReadConcern); if (!existingShards.isOK()) { return existingShards.getStatus().withContext( "Failed to load existing shards during addShard"); } // Now check if this shard already exists - if it already exists *with the same options* then // the addShard request can return success early without doing anything more. for (const auto& existingShard : existingShards.getValue().value) { auto swExistingShardConnStr = ConnectionString::parse(existingShard.getHost()); if (!swExistingShardConnStr.isOK()) { return swExistingShardConnStr.getStatus(); } auto existingShardConnStr = std::move(swExistingShardConnStr.getValue()); // Function for determining if the options for the shard that is being added match the // options of an existing shard that conflicts with it. auto shardsAreEquivalent = [&]() { if (proposedShardName && *proposedShardName != existingShard.getName()) { return false; } if (proposedShardConnectionString.type() != existingShardConnStr.type()) { return false; } if (proposedShardConnectionString.type() == ConnectionString::SET && proposedShardConnectionString.getSetName() != existingShardConnStr.getSetName()) { return false; } if (proposedShardMaxSize != existingShard.getMaxSizeMB()) { return false; } return true; }; if (existingShardConnStr.type() == ConnectionString::SET && proposedShardConnectionString.type() == ConnectionString::SET && existingShardConnStr.getSetName() == proposedShardConnectionString.getSetName()) { // An existing shard has the same replica set name as the shard being added. // If the options aren't the same, then this is an error, // but if the options match then the addShard operation should be immediately // considered a success and terminated. if (shardsAreEquivalent()) { return {existingShard}; } else { return {ErrorCodes::IllegalOperation, str::stream() << "A shard already exists containing the replica set '" << existingShardConnStr.getSetName() << "'"}; } } for (const auto& existingHost : existingShardConnStr.getServers()) { // Look if any of the hosts in the existing shard are present within the shard trying // to be added. for (const auto& addingHost : proposedShardConnectionString.getServers()) { if (existingHost == addingHost) { // At least one of the hosts in the shard being added already exists in an // existing shard. If the options aren't the same, then this is an error, // but if the options match then the addShard operation should be immediately // considered a success and terminated. if (shardsAreEquivalent()) { return {existingShard}; } else { return {ErrorCodes::IllegalOperation, str::stream() << "'" << addingHost.toString() << "' " << "is already a member of the existing shard '" << existingShard.getHost() << "' (" << existingShard.getName() << ")."}; } } } } if (proposedShardName && *proposedShardName == existingShard.getName()) { // If we get here then we're trying to add a shard with the same name as an existing // shard, but there was no overlap in the hosts between the existing shard and the // proposed connection string for the new shard. return {ErrorCodes::IllegalOperation, str::stream() << "A shard named " << *proposedShardName << " already exists"}; } } return {boost::none}; }
StatusWith<ShardType> ShardingCatalogManagerImpl::_validateHostAsShard( OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter, const std::string* shardProposedName, const ConnectionString& connectionString) { // Check if the node being added is a mongos or a version of mongod too old to speak the current // communication protocol. auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", BSON("isMaster" << 1)); if (!swCommandResponse.isOK()) { if (swCommandResponse.getStatus() == ErrorCodes::RPCProtocolNegotiationFailed) { // Mongos to mongos commands are no longer supported in the wire protocol // (because mongos does not support OP_COMMAND), similarly for a new mongos // and an old mongod. So the call will fail in such cases. // TODO: If/When mongos ever supports opCommands, this logic will break because // cmdStatus will be OK. return {ErrorCodes::RPCProtocolNegotiationFailed, str::stream() << targeter->connectionString().toString() << " does not recognize the RPC protocol being used. This is" << " likely because it contains a node that is a mongos or an old" << " version of mongod."}; } else { return swCommandResponse.getStatus(); } } // Check for a command response error auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus); if (!resIsMasterStatus.isOK()) { return {resIsMasterStatus.code(), str::stream() << "Error running isMaster against " << targeter->connectionString().toString() << ": " << causedBy(resIsMasterStatus)}; } auto resIsMaster = std::move(swCommandResponse.getValue().response); // Check that the node being added is a new enough version. // If we're running this code, that means the mongos that the addShard request originated from // must be at least version 3.4 (since 3.2 mongoses don't know about the _configsvrAddShard // command). Since it is illegal to have v3.4 mongoses with v3.2 shards, we should reject // adding any shards that are not v3.4. We can determine this by checking that the // maxWireVersion reported in isMaster is at least COMMANDS_ACCEPT_WRITE_CONCERN. // TODO(SERVER-25623): This approach won't work to prevent v3.6 mongoses from adding v3.4 // shards, so we'll have to rethink this during the 3.5 development cycle. long long maxWireVersion; Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion); if (!status.isOK()) { return Status(status.code(), str::stream() << "isMaster returned invalid 'maxWireVersion' " << "field when attempting to add " << connectionString.toString() << " as a shard: " << status.reason()); } if (maxWireVersion < WireVersion::COMMANDS_ACCEPT_WRITE_CONCERN) { return Status(ErrorCodes::IncompatibleServerVersion, str::stream() << "Cannot add " << connectionString.toString() << " as a shard because we detected a mongod with server " "version older than 3.4.0. It is invalid to add v3.2 and " "older shards through a v3.4 mongos."); } // Check whether there is a master. If there isn't, the replica set may not have been // initiated. If the connection is a standalone, it will return true for isMaster. bool isMaster; status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster); if (!status.isOK()) { return Status(status.code(), str::stream() << "isMaster returned invalid 'ismaster' " << "field when attempting to add " << connectionString.toString() << " as a shard: " << status.reason()); } if (!isMaster) { return {ErrorCodes::NotMaster, str::stream() << connectionString.toString() << " does not have a master. If this is a replica set, ensure that it has a" << " healthy primary and that the set has been properly initiated."}; } const std::string providedSetName = connectionString.getSetName(); const std::string foundSetName = resIsMaster["setName"].str(); // Make sure the specified replica set name (if any) matches the actual shard's replica set if (providedSetName.empty() && !foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host is part of set " << foundSetName << "; " << "use replica set url format " << "<setname>/<server1>,<server2>, ..."}; } if (!providedSetName.empty() && foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host did not return a set name; " << "is the replica set still initializing? " << resIsMaster}; } // Make sure the set name specified in the connection string matches the one where its hosts // belong into if (!providedSetName.empty() && (providedSetName != foundSetName)) { return {ErrorCodes::OperationFailed, str::stream() << "the provided connection string (" << connectionString.toString() << ") does not match the actual set name " << foundSetName}; } // Is it a config server? if (resIsMaster.hasField("configsvr")) { return {ErrorCodes::OperationFailed, str::stream() << "Cannot add " << connectionString.toString() << " as a shard since it is a config server"}; } // If the shard is part of a replica set, make sure all the hosts mentioned in the connection // string are part of the set. It is fine if not all members of the set are mentioned in the // connection string, though. if (!providedSetName.empty()) { std::set<std::string> hostSet; BSONObjIterator iter(resIsMaster["hosts"].Obj()); while (iter.more()) { hostSet.insert(iter.next().String()); // host:port } if (resIsMaster["passives"].isABSONObj()) { BSONObjIterator piter(resIsMaster["passives"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } if (resIsMaster["arbiters"].isABSONObj()) { BSONObjIterator piter(resIsMaster["arbiters"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } for (const auto& hostEntry : connectionString.getServers()) { const auto& host = hostEntry.toString(); // host:port if (hostSet.find(host) == hostSet.end()) { return {ErrorCodes::OperationFailed, str::stream() << "in seed list " << connectionString.toString() << ", host " << host << " does not belong to replica set " << foundSetName << "; found " << resIsMaster.toString()}; } } } std::string actualShardName; if (shardProposedName) { actualShardName = *shardProposedName; } else if (!foundSetName.empty()) { // Default it to the name of the replica set actualShardName = foundSetName; } // Disallow adding shard replica set with name 'config' if (actualShardName == NamespaceString::kConfigDb) { return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"}; } // Retrieve the most up to date connection string that we know from the replica set monitor (if // this is a replica set shard, otherwise it will be the same value as connectionString). ConnectionString actualShardConnStr = targeter->connectionString(); ShardType shard; shard.setName(actualShardName); shard.setHost(actualShardConnStr.toString()); shard.setState(ShardType::ShardState::kShardAware); return shard; }
StatusWith<ShardType> ShardingCatalogManagerImpl::_validateHostAsShard( OperationContext* txn, std::shared_ptr<RemoteCommandTargeter> targeter, const std::string* shardProposedName, const ConnectionString& connectionString) { // Check whether any host in the connection is already part of the cluster. Grid::get(txn)->shardRegistry()->reload(txn); for (const auto& hostAndPort : connectionString.getServers()) { std::shared_ptr<Shard> shard; shard = Grid::get(txn)->shardRegistry()->getShardNoReload(hostAndPort.toString()); if (shard) { return {ErrorCodes::OperationFailed, str::stream() << "'" << hostAndPort.toString() << "' " << "is already a member of the existing shard '" << shard->getConnString().toString() << "' (" << shard->getId() << ")."}; } } // Check for mongos and older version mongod connections, and whether the hosts // can be found for the user specified replset. auto swCommandResponse = _runCommandForAddShard(txn, targeter.get(), "admin", BSON("isMaster" << 1)); if (!swCommandResponse.isOK()) { if (swCommandResponse.getStatus() == ErrorCodes::RPCProtocolNegotiationFailed) { // Mongos to mongos commands are no longer supported in the wire protocol // (because mongos does not support OP_COMMAND), similarly for a new mongos // and an old mongod. So the call will fail in such cases. // TODO: If/When mongos ever supports opCommands, this logic will break because // cmdStatus will be OK. return {ErrorCodes::RPCProtocolNegotiationFailed, str::stream() << targeter->connectionString().toString() << " does not recognize the RPC protocol being used. This is" << " likely because it contains a node that is a mongos or an old" << " version of mongod."}; } else { return swCommandResponse.getStatus(); } } // Check for a command response error auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus); if (!resIsMasterStatus.isOK()) { return {resIsMasterStatus.code(), str::stream() << "Error running isMaster against " << targeter->connectionString().toString() << ": " << causedBy(resIsMasterStatus)}; } auto resIsMaster = std::move(swCommandResponse.getValue().response); // Check whether there is a master. If there isn't, the replica set may not have been // initiated. If the connection is a standalone, it will return true for isMaster. bool isMaster; Status status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster); if (!status.isOK()) { return Status(status.code(), str::stream() << "isMaster returned invalid 'ismaster' " << "field when attempting to add " << connectionString.toString() << " as a shard: " << status.reason()); } if (!isMaster) { return {ErrorCodes::NotMaster, str::stream() << connectionString.toString() << " does not have a master. If this is a replica set, ensure that it has a" << " healthy primary and that the set has been properly initiated."}; } const string providedSetName = connectionString.getSetName(); const string foundSetName = resIsMaster["setName"].str(); // Make sure the specified replica set name (if any) matches the actual shard's replica set if (providedSetName.empty() && !foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host is part of set " << foundSetName << "; " << "use replica set url format " << "<setname>/<server1>,<server2>, ..."}; } if (!providedSetName.empty() && foundSetName.empty()) { return {ErrorCodes::OperationFailed, str::stream() << "host did not return a set name; " << "is the replica set still initializing? " << resIsMaster}; } // Make sure the set name specified in the connection string matches the one where its hosts // belong into if (!providedSetName.empty() && (providedSetName != foundSetName)) { return {ErrorCodes::OperationFailed, str::stream() << "the provided connection string (" << connectionString.toString() << ") does not match the actual set name " << foundSetName}; } // Is it a config server? if (resIsMaster.hasField("configsvr")) { return {ErrorCodes::OperationFailed, str::stream() << "Cannot add " << connectionString.toString() << " as a shard since it is a config server"}; } // If the shard is part of a replica set, make sure all the hosts mentioned in the connection // string are part of the set. It is fine if not all members of the set are mentioned in the // connection string, though. if (!providedSetName.empty()) { std::set<string> hostSet; BSONObjIterator iter(resIsMaster["hosts"].Obj()); while (iter.more()) { hostSet.insert(iter.next().String()); // host:port } if (resIsMaster["passives"].isABSONObj()) { BSONObjIterator piter(resIsMaster["passives"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } if (resIsMaster["arbiters"].isABSONObj()) { BSONObjIterator piter(resIsMaster["arbiters"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } vector<HostAndPort> hosts = connectionString.getServers(); for (size_t i = 0; i < hosts.size(); i++) { const string host = hosts[i].toString(); // host:port if (hostSet.find(host) == hostSet.end()) { return {ErrorCodes::OperationFailed, str::stream() << "in seed list " << connectionString.toString() << ", host " << host << " does not belong to replica set " << foundSetName << "; found " << resIsMaster.toString()}; } } } string actualShardName; if (shardProposedName) { actualShardName = *shardProposedName; } else if (!foundSetName.empty()) { // Default it to the name of the replica set actualShardName = foundSetName; } // Disallow adding shard replica set with name 'config' if (actualShardName == "config") { return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"}; } // Retrieve the most up to date connection string that we know from the replica set monitor (if // this is a replica set shard, otherwise it will be the same value as connectionString). ConnectionString actualShardConnStr = targeter->connectionString(); ShardType shard; shard.setName(actualShardName); shard.setHost(actualShardConnStr.toString()); return shard; }
/** * Performs sanity check on the given connection string on whether the seed list * is consistent with the view of the set using replSetGetStatus. */ bool addReplSetShardCheck( const ConnectionString& servers, string* errMsg ) { bool ok = false; BSONObj replSetStat; try { ScopedDbConnection newShardConn(servers.toString()); ok = newShardConn->runCommand( "admin", BSON( "replSetGetStatus" << 1 ), replSetStat ); newShardConn.done(); } catch ( const DBException& ex ) { *errMsg = str::stream() << "Error encountered while checking status of " << servers.toString() << ": " << causedBy( ex ); } if( !ok ) { if ( replSetStat["info"].str() == "configsvr" ) { *errMsg = "the specified mongod is a --configsvr and " "should thus not be a shard server"; } else { *errMsg = str::stream() << "error encountered calling replSetGetStatus: " << replSetStat; } return false; } // if the shard has only one host, make sure it is not part of a replica set string setName = replSetStat["set"].str(); string commandSetName = servers.getSetName(); if ( commandSetName.empty() && ! setName.empty() ) { *errMsg = str::stream() << "host is part of set: " << setName << " use replica set url format <setname>/<server1>,<server2>,...."; return false; } if ( !commandSetName.empty() && setName.empty() ) { *errMsg = str::stream() << "host did not return a set name, " << "is the replica set still initializing?" << replSetStat; return false; } // if the shard is part of replica set, make sure it is the right one if ( ! commandSetName.empty() && ( commandSetName != setName ) ) { *errMsg = str::stream() << "host is part of a different set: " << setName; return false; } // if the shard is part of a replica set, make sure all the hosts mentioned in // 'servers' are part of the set. It is fine if not all members of the set // are present in 'servers'. bool foundAll = true; string offendingHost; if ( ! commandSetName.empty() ) { set<string> hostSet; BSONElement membersElem( replSetStat["members"] ); if ( membersElem.type() == Array ) { BSONArrayIteratorSorted iter( BSONArray( membersElem.Obj() )); while ( iter.more() ) { hostSet.insert( iter.next()["name"].str() ); // host:port } vector<HostAndPort> hosts = servers.getServers(); for ( size_t i = 0 ; i < hosts.size() ; i++ ) { if (!hosts[i].hasPort()) { hosts[i].setPort(CmdLine::DefaultDBPort); } string host = hosts[i].toString(); // host:port if ( hostSet.find( host ) == hostSet.end() ) { offendingHost = host; foundAll = false; break; } } } if ( hostSet.empty() ) { *errMsg = "replSetGetStatus returned an empty set. " " Please wait for the set to initialize and try again."; return false; } } if ( ! foundAll ) { *errMsg = str::stream() << "in seed list " << servers.toString() << ", host " << offendingHost << " does not belong to replica set " << setName; return false; } return true; }
StatusWith<string> isValidShard(const string& name, const ConnectionString& shardConnectionString, ScopedDbConnection& conn) { if (conn->type() == ConnectionString::SYNC) { return Status(ErrorCodes::BadValue, "can't use sync cluster as a shard; for a replica set, " "you have to use <setname>/<server1>,<server2>,..."); } BSONObj resIsMongos; // (ok == 0) implies that it is a mongos if (conn->runCommand("admin", BSON("isdbgrid" << 1), resIsMongos)) { return Status(ErrorCodes::BadValue, "can't add a mongos process as a shard"); } BSONObj resIsMaster; if (!conn->runCommand("admin", BSON("isMaster" << 1), resIsMaster)) { return Status(ErrorCodes::OperationFailed, str::stream() << "failed running isMaster: " << resIsMaster); } // if the shard has only one host, make sure it is not part of a replica set string setName = resIsMaster["setName"].str(); string commandSetName = shardConnectionString.getSetName(); if (commandSetName.empty() && !setName.empty()) { return Status(ErrorCodes::BadValue, str::stream() << "host is part of set " << setName << "; " << "use replica set url format " << "<setname>/<server1>,<server2>, ..."); } if (!commandSetName.empty() && setName.empty()) { return Status(ErrorCodes::OperationFailed, str::stream() << "host did not return a set name; " << "is the replica set still initializing? " << resIsMaster); } // if the shard is part of replica set, make sure it is the right one if (!commandSetName.empty() && (commandSetName != setName)) { return Status(ErrorCodes::OperationFailed, str::stream() << "host is part of a different set: " << setName); } if (setName.empty()) { // check this isn't a --configsvr BSONObj res; bool ok = conn->runCommand("admin", BSON("replSetGetStatus" << 1), res); if(!ok && res["info"].type() == String && res["info"].String() == "configsvr") { return Status(ErrorCodes::BadValue, "the specified mongod is a --configsvr and " "should thus not be a shard server"); } } // if the shard is part of a replica set, // make sure all the hosts mentioned in 'shardConnectionString' are part of // the set. It is fine if not all members of the set are present in 'shardConnectionString'. bool foundAll = true; string offendingHost; if (!commandSetName.empty()) { set<string> hostSet; BSONObjIterator iter(resIsMaster["hosts"].Obj()); while (iter.more()) { hostSet.insert(iter.next().String()); // host:port } if (resIsMaster["passives"].isABSONObj()) { BSONObjIterator piter(resIsMaster["passives"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } if (resIsMaster["arbiters"].isABSONObj()) { BSONObjIterator piter(resIsMaster["arbiters"].Obj()); while (piter.more()) { hostSet.insert(piter.next().String()); // host:port } } vector<HostAndPort> hosts = shardConnectionString.getServers(); for (size_t i = 0; i < hosts.size(); i++) { if (!hosts[i].hasPort()) { hosts[i] = HostAndPort(hosts[i].host(), hosts[i].port()); } string host = hosts[i].toString(); // host:port if (hostSet.find(host) == hostSet.end()) { offendingHost = host; foundAll = false; break; } } } if (!foundAll) { return Status(ErrorCodes::OperationFailed, str::stream() << "in seed list " << shardConnectionString.toString() << ", host " << offendingHost << " does not belong to replica set " << setName); } string shardName(name); // shard name defaults to the name of the replica set if (name.empty() && !setName.empty()) { shardName = setName; } // disallow adding shard replica set with name 'config' if (shardName == "config") { return Status(ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"); } return shardName; }
Status checkClusterMongoVersions(const ConnectionString& configLoc, const string& minMongoVersion) { scoped_ptr<ScopedDbConnection> connPtr; // // Find mongos pings in config server // try { connPtr.reset(new ScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(MongosType::ConfigNS, Query()))); while (cursor->more()) { BSONObj pingDoc = cursor->next(); MongosType ping; string errMsg; // NOTE: We don't care if the ping is invalid, legacy stuff will be if (!ping.parseBSON(pingDoc, &errMsg)) { warning() << "could not parse ping document: " << pingDoc << causedBy(errMsg) << endl; continue; } string mongoVersion = "2.0"; // Hack to determine older mongos versions from ping format if (ping.isWaitingSet()) mongoVersion = "2.2"; if (ping.isMongoVersionSet() && ping.getMongoVersion() != "") { mongoVersion = ping.getMongoVersion(); } Date_t lastPing = ping.getPing(); long long quietIntervalMillis = 0; Date_t currentJsTime = jsTime(); if (currentJsTime >= lastPing) { quietIntervalMillis = static_cast<long long>(currentJsTime - lastPing); } long long quietIntervalMins = quietIntervalMillis / (60 * 1000); // We assume that anything that hasn't pinged in 5 minutes is probably down if (quietIntervalMins >= 5) { log() << "stale mongos detected " << quietIntervalMins << " minutes ago," << " network location is " << pingDoc["_id"].String() << ", not checking version" << endl; } else { if (versionCmp(mongoVersion, minMongoVersion) < 0) { return Status(ErrorCodes::RemoteValidationError, stream() << "version " << mongoVersion << " detected on mongos at " << ping.getName() << ", but version >= " << minMongoVersion << " required; you must wait 5 minutes " << "after shutting down a pre-" << minMongoVersion << " mongos"); } } } } catch (const DBException& e) { return e.toStatus("could not read mongos pings collection"); } // // Load shards from config server // vector<HostAndPort> servers; try { ScopedDbConnection& conn = *connPtr; scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(ShardType::ConfigNS, Query()))); while (cursor->more()) { BSONObj shardDoc = cursor->next(); ShardType shard; string errMsg; if (!shard.parseBSON(shardDoc, &errMsg) || !shard.isValid(&errMsg)) { connPtr->done(); return Status(ErrorCodes::UnsupportedFormat, stream() << "invalid shard " << shardDoc << " read from the config server" << causedBy(errMsg)); } ConnectionString shardLoc = ConnectionString::parse(shard.getHost(), errMsg); if (shardLoc.type() == ConnectionString::INVALID) { connPtr->done(); return Status(ErrorCodes::UnsupportedFormat, stream() << "invalid shard host " << shard.getHost() << " read from the config server" << causedBy(errMsg)); } vector<HostAndPort> shardServers = shardLoc.getServers(); servers.insert(servers.end(), shardServers.begin(), shardServers.end()); } } catch (const DBException& e) { return e.toStatus("could not read shards collection"); } connPtr->done(); // Add config servers to list of servers to check version against vector<HostAndPort> configServers = configLoc.getServers(); servers.insert(servers.end(), configServers.begin(), configServers.end()); // // We've now got all the shard info from the config server, start contacting the shards // and config servers and verifying their versions. // for (vector<HostAndPort>::iterator serverIt = servers.begin(); serverIt != servers.end(); ++serverIt) { // Note: This will *always* be a single-host connection ConnectionString serverLoc(*serverIt); dassert(serverLoc.type() == ConnectionString::MASTER || serverLoc.type() == ConnectionString::CUSTOM); // for dbtests log() << "checking that version of host " << serverLoc << " is compatible with " << minMongoVersion << endl; scoped_ptr<ScopedDbConnection> serverConnPtr; bool resultOk; BSONObj buildInfo; try { serverConnPtr.reset(new ScopedDbConnection(serverLoc, 30)); ScopedDbConnection& serverConn = *serverConnPtr; resultOk = serverConn->runCommand("admin", BSON("buildInfo" << 1), buildInfo); } catch (const DBException& e) { warning() << "could not run buildInfo command on " << serverLoc.toString() << " " << causedBy(e) << ". Please ensure that this server is up and at a " "version >= " << minMongoVersion; continue; } // TODO: Make running commands saner such that we can consolidate error handling if (!resultOk) { return Status(ErrorCodes::UnknownError, stream() << DBClientConnection::getLastErrorString(buildInfo) << causedBy(buildInfo.toString())); } serverConnPtr->done(); verify(buildInfo["version"].type() == String); string mongoVersion = buildInfo["version"].String(); if (versionCmp(mongoVersion, minMongoVersion) < 0) { return Status(ErrorCodes::RemoteValidationError, stream() << "version " << mongoVersion << " detected on mongo " "server at " << serverLoc.toString() << ", but version >= " << minMongoVersion << " required"); } } return Status::OK(); }
bool DistributedLock::checkSkew(const ConnectionString& cluster, unsigned skewChecks, unsigned long long maxClockSkew, unsigned long long maxNetSkew) { vector<HostAndPort> servers = cluster.getServers(); if (servers.size() < 1) return true; vector<long long> avgSkews; for (unsigned i = 0; i < skewChecks; i++) { // Find the average skew for each server unsigned s = 0; for (vector<HostAndPort>::iterator si = servers.begin(); si != servers.end(); ++si, s++) { if (i == 0) avgSkews.push_back(0); // Could check if this is self, but shouldn't matter since local network connection // should be fast. ConnectionString server(*si); vector<long long> skew; BSONObj result; Date_t remote = remoteTime(server, maxNetSkew); Date_t local = jsTime(); // Remote time can be delayed by at most MAX_NET_SKEW // Skew is how much time we'd have to add to local to get to remote avgSkews[s] += durationCount<Milliseconds>(remote - local); LOG(logLvl + 1) << "skew from remote server " << server << " found: " << (remote - local); } } // Analyze skews long long serverMaxSkew = 0; long long serverMinSkew = 0; for (unsigned s = 0; s < avgSkews.size(); s++) { long long avgSkew = (avgSkews[s] /= skewChecks); // Keep track of max and min skews if (s == 0) { serverMaxSkew = avgSkew; serverMinSkew = avgSkew; } else { if (avgSkew > serverMaxSkew) serverMaxSkew = avgSkew; if (avgSkew < serverMinSkew) serverMinSkew = avgSkew; } } long long totalSkew = serverMaxSkew - serverMinSkew; // Make sure our max skew is not more than our pre-set limit if (totalSkew > (long long)maxClockSkew) { LOG(logLvl + 1) << "total clock skew of " << totalSkew << "ms for servers " << cluster << " is out of " << maxClockSkew << "ms bounds." << endl; return false; } LOG(logLvl + 1) << "total clock skew of " << totalSkew << "ms for servers " << cluster << " is in " << maxClockSkew << "ms bounds." << endl; return true; }