void ShardRegistry::_updateLookupMapsForShard_inlock(shared_ptr<Shard> shard, const ConnectionString& newConnString) { auto oldConnString = shard->getConnString(); for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); } _lookup[shard->getId()] = shard; if (newConnString.type() == ConnectionString::SET) { _rsLookup[newConnString.getSetName()] = shard; } else if (newConnString.type() == ConnectionString::CUSTOM) { // CUSTOM connection strings (ie "$dummy:10000) become DBDirectClient connections which // always return "localhost" as their resposne to getServerAddress(). This is just for // making dbtest work. _lookup["localhost"] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[newConnString.toString()] = shard; for (const HostAndPort& hostAndPort : newConnString.getServers()) { _lookup[hostAndPort.toString()] = shard; } }
Status DBClientShardResolver::findMaster( const std::string connString, ConnectionString* resolvedHost ) { std::string errMsg; ConnectionString rawHost = ConnectionString::parse( connString, errMsg ); dassert( errMsg == "" ); dassert( rawHost.type() == ConnectionString::SET || rawHost.type() == ConnectionString::MASTER ); if ( rawHost.type() == ConnectionString::MASTER ) { *resolvedHost = rawHost; return Status::OK(); } // // If we need to, then get the particular node we're targeting in the replica set // // Don't create the monitor unless we need to - fast path ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get(rawHost.getSetName()); if (!replMonitor) { // Slow path std::set<HostAndPort> seedServers(rawHost.getServers().begin(), rawHost.getServers().end()); ReplicaSetMonitor::createIfNeeded(rawHost.getSetName(), seedServers); replMonitor = ReplicaSetMonitor::get(rawHost.getSetName()); } if (!replMonitor) { return Status( ErrorCodes::ReplicaSetNotFound, string("unknown replica set ") + rawHost.getSetName() ); } try { // This can throw when we don't find a master! HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert(); *resolvedHost = ConnectionString::parse( masterHostAndPort.toString(), errMsg ); dassert( errMsg == "" ); return Status::OK(); } catch ( const DBException& ) { return Status( ErrorCodes::HostNotFound, string("could not contact primary for replica set ") + replMonitor->getName() ); } // Unreachable dassert( false ); return Status( ErrorCodes::UnknownError, "" ); }
void ShardRegistryData::_addShard(WithLock lk, std::shared_ptr<Shard> const& shard, bool useOriginalCS) { const ShardId shardId = shard->getId(); const ConnectionString connString = useOriginalCS ? shard->originalConnString() : shard->getConnString(); auto currentShard = _findByShardId(lk, shardId); if (currentShard) { auto oldConnString = currentShard->originalConnString(); if (oldConnString.toString() != connString.toString()) { log() << "Updating ShardRegistry connection string for shard " << currentShard->getId() << " from: " << oldConnString.toString() << " to: " << connString.toString(); } for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); _hostLookup.erase(host); } _lookup.erase(oldConnString.toString()); } _lookup[shard->getId()] = shard; LOG(3) << "Adding shard " << shard->getId() << ", with CS " << connString.toString(); if (connString.type() == ConnectionString::SET) { _rsLookup[connString.getSetName()] = shard; } else if (connString.type() == ConnectionString::CUSTOM) { // CUSTOM connection strings (ie "$dummy:10000) become DBDirectClient connections which // always return "localhost" as their response to getServerAddress(). This is just for // making dbtest work. _lookup[ShardId("localhost")] = shard; _hostLookup[HostAndPort("localhost")] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[connString.toString()] = shard; for (const HostAndPort& hostAndPort : connString.getServers()) { _lookup[hostAndPort.toString()] = shard; _hostLookup[hostAndPort] = shard; } }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor( const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); // do not restart taskExecutor if is in shutdown if (!_taskExecutor && !_isShutdown) { // construct task executor auto net = executor::makeNetworkInterface("ReplicaSetMonitor-TaskExecutor"); auto netPtr = net.get(); _taskExecutor = stdx::make_unique<ThreadPoolTaskExecutor>( stdx::make_unique<NetworkInterfaceThreadPool>(netPtr), std::move(net)); LOG(1) << "Starting up task executor for monitoring replica sets in response to request to " "monitor set: " << connStr.toString(); _taskExecutor->startup(); } auto setName = connStr.getSetName(); auto monitor = _monitors[setName].lock(); if (monitor) { return monitor; } const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end()); log() << "Starting new replica set monitor for " << connStr.toString(); auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers); _monitors[setName] = newMonitor; newMonitor->init(); return newMonitor; }
shared_ptr<Shard> ShardRegistry::find(const string& ident) { string errmsg; ConnectionString connStr = ConnectionString::parse(ident, errmsg); uassert(18642, str::stream() << "Error parsing connection string: " << ident, errmsg.empty()); if (connStr.type() == ConnectionString::SET) { boost::lock_guard<boost::mutex> lk(_rsMutex); ShardMap::iterator iter = _rsLookup.find(connStr.getSetName()); if (iter == _rsLookup.end()) { return nullptr; } return iter->second; } else { boost::lock_guard<boost::mutex> lk(_mutex); ShardMap::iterator iter = _lookup.find(ident); if (iter == _lookup.end()) { return nullptr; } return iter->second; } }
void ShardRegistry::updateReplSetHosts(const ConnectionString& newConnString) { invariant(newConnString.type() == ConnectionString::SET || newConnString.type() == ConnectionString::CUSTOM); // For dbtests // to prevent update config shard connection string during init stdx::unique_lock<stdx::mutex> lock(_reloadMutex); _data.rebuildShardIfExists(newConnString, _shardFactory.get()); }
/** * Returns the currently-set config hosts for a cluster */ static vector<ConnectionString> getConfigHosts() { vector<ConnectionString> configHosts; ConnectionString configHostOrHosts = configServer.getConnectionString(); if (configHostOrHosts.type() == ConnectionString::MASTER) { configHosts.push_back(configHostOrHosts); } else if (configHostOrHosts.type() == ConnectionString::SYNC) { vector<HostAndPort> configHPs = configHostOrHosts.getServers(); for (vector<HostAndPort>::iterator it = configHPs.begin(); it != configHPs.end(); ++it) { configHosts.push_back(ConnectionString(*it)); } } else { // This is only for tests. dassert(configHostOrHosts.type() == ConnectionString::CUSTOM); configHosts.push_back(configHostOrHosts); } return configHosts; }
Status CatalogManagerReplicaSet::init(const ConnectionString& configCS, std::unique_ptr<DistLockManager> distLockManager) { invariant(configCS.type() == ConnectionString::SET); _configServerConnectionString = configCS; _distLockManager = std::move(distLockManager); return Status::OK(); }
Status initializeGlobalShardingState(OperationContext* txn, const ConnectionString& configCS, bool allowNetworking) { if (configCS.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Unrecognized connection string."}; } auto network = executor::makeNetworkInterface("NetworkInterfaceASIO-ShardRegistry", stdx::make_unique<ShardingNetworkConnectionHook>(), stdx::make_unique<ShardingEgressMetadataHook>()); auto networkPtr = network.get(); auto shardRegistry( stdx::make_unique<ShardRegistry>(stdx::make_unique<RemoteCommandTargeterFactoryImpl>(), makeTaskExecutorPool(std::move(network)), networkPtr, makeTaskExecutor(executor::makeNetworkInterface( "NetworkInterfaceASIO-ShardRegistry-TaskExecutor")), configCS)); auto catalogManager = makeCatalogManager(getGlobalServiceContext(), shardRegistry.get(), HostAndPort(getHostName(), serverGlobalParams.port)); shardRegistry->startup(); grid.init(std::move(catalogManager), std::move(shardRegistry), stdx::make_unique<ClusterCursorManager>(getGlobalServiceContext()->getClockSource())); while (!inShutdown()) { try { Status status = grid.catalogManager(txn)->startup(txn, allowNetworking); uassertStatusOK(status); if (serverGlobalParams.configsvrMode == CatalogManager::ConfigServerMode::NONE) { grid.shardRegistry()->reload(txn); } return Status::OK(); } catch (const DBException& ex) { Status status = ex.toStatus(); if (status == ErrorCodes::ReplicaSetNotFound) { // ReplicaSetNotFound most likely means we've been waiting for the config replica // set to come up for so long that the ReplicaSetMonitor stopped monitoring the set. // Rebuild the config shard to force the monitor to resume monitoring the config // servers. grid.shardRegistry()->rebuildConfigShard(); } log() << "Error initializing sharding state, sleeping for 2 seconds and trying again" << causedBy(status); sleepmillis(2000); continue; } } return Status::OK(); }
Status DBClientShardResolver::findMaster( const std::string connString, ConnectionString* resolvedHost ) { std::string errMsg; ConnectionString rawHost = ConnectionString::parse( connString, errMsg ); dassert( errMsg == "" ); dassert( rawHost.type() == ConnectionString::SET || rawHost.type() == ConnectionString::MASTER ); if ( rawHost.type() == ConnectionString::MASTER ) { *resolvedHost = rawHost; return Status::OK(); } // // If we need to, then get the particular node we're targeting in the replica set // // Does not reload the monitor if it doesn't currently exist ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawHost.getSetName(), false ); if ( !replMonitor ) { return Status( ErrorCodes::ReplicaSetNotFound, string("unknown replica set ") + rawHost.getSetName() ); } try { // This can throw when we don't find a master! HostAndPort masterHostAndPort = replMonitor->getMasterOrUassert(); *resolvedHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg ); dassert( errMsg == "" ); return Status::OK(); } catch ( const DBException& ) { return Status( ErrorCodes::HostNotFound, string("could not contact primary for replica set ") + replMonitor->getName() ); } // Unreachable dassert( false ); return Status( ErrorCodes::UnknownError, "" ); }
Status initializeGlobalShardingState(OperationContext* txn, const ConnectionString& configCS, StringData distLockProcessId, std::unique_ptr<ShardFactory> shardFactory, rpc::ShardingEgressMetadataHookBuilder hookBuilder, ShardingCatalogManagerBuilder catalogManagerBuilder) { if (configCS.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Unrecognized connection string."}; } auto network = executor::makeNetworkInterface("NetworkInterfaceASIO-ShardRegistry", stdx::make_unique<ShardingNetworkConnectionHook>(), hookBuilder()); auto networkPtr = network.get(); auto executorPool = makeTaskExecutorPool(std::move(network), hookBuilder()); executorPool->startup(); auto shardRegistry(stdx::make_unique<ShardRegistry>(std::move(shardFactory), configCS)); auto catalogClient = makeCatalogClient(txn->getServiceContext(), shardRegistry.get(), distLockProcessId); auto rawCatalogClient = catalogClient.get(); std::unique_ptr<ShardingCatalogManager> catalogManager = catalogManagerBuilder( rawCatalogClient, makeTaskExecutor(executor::makeNetworkInterface("AddShard-TaskExecutor"))); auto rawCatalogManager = catalogManager.get(); grid.init( std::move(catalogClient), std::move(catalogManager), stdx::make_unique<CatalogCache>(), std::move(shardRegistry), stdx::make_unique<ClusterCursorManager>(getGlobalServiceContext()->getPreciseClockSource()), stdx::make_unique<BalancerConfiguration>(), std::move(executorPool), networkPtr); auto status = rawCatalogClient->startup(); if (!status.isOK()) { return status; } if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { // Only config servers get a ShardingCatalogManager. status = rawCatalogManager->startup(); if (!status.isOK()) { return status; } } return Status::OK(); }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor(const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); shared_ptr<ReplicaSetMonitor>& monitor = _monitors[connStr.getSetName()]; if (!monitor) { const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end()); monitor = std::make_shared<ReplicaSetMonitor>(connStr.getSetName(), servers); } return monitor; }
std::unique_ptr<RemoteCommandTargeter> RemoteCommandTargeterFactoryImpl::create( const ConnectionString& connStr) { switch (connStr.type()) { case ConnectionString::MASTER: case ConnectionString::CUSTOM: invariant(connStr.getServers().size() == 1); return stdx::make_unique<RemoteCommandTargeterStandalone>(connStr.getServers().front()); case ConnectionString::SET: return stdx::make_unique<RemoteCommandTargeterRS>(connStr.getSetName(), connStr.getServers()); case ConnectionString::INVALID: // These connections should never be seen break; } MONGO_UNREACHABLE; }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor( const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); _setupTaskExecutorInLock(connStr.toString()); auto setName = connStr.getSetName(); auto monitor = _monitors[setName].lock(); if (monitor) { uassertNotMixingSSL(monitor->getOriginalUri().getSSLMode(), transport::kGlobalSSLMode); return monitor; } log() << "Starting new replica set monitor for " << connStr.toString(); auto newMonitor = std::make_shared<ReplicaSetMonitor>(MongoURI(connStr)); _monitors[setName] = newMonitor; newMonitor->init(); return newMonitor; }
shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorManager::getOrCreateMonitor( const ConnectionString& connStr) { invariant(connStr.type() == ConnectionString::SET); stdx::lock_guard<stdx::mutex> lk(_mutex); _setupTaskExecutorInLock(connStr.toString()); auto setName = connStr.getSetName(); auto monitor = _monitors[setName].lock(); if (monitor) { return monitor; } const std::set<HostAndPort> servers(connStr.getServers().begin(), connStr.getServers().end()); log() << "Starting new replica set monitor for " << connStr.toString(); auto newMonitor = std::make_shared<ReplicaSetMonitor>(setName, servers); _monitors[setName] = newMonitor; newMonitor->init(); return newMonitor; }
Status initializeGlobalShardingState(const ConnectionString& configCS, uint64_t maxChunkSizeBytes, std::unique_ptr<ShardFactory> shardFactory, rpc::ShardingEgressMetadataHookBuilder hookBuilder) { if (configCS.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Unrecognized connection string."}; } auto network = executor::makeNetworkInterface("NetworkInterfaceASIO-ShardRegistry", stdx::make_unique<ShardingNetworkConnectionHook>(), hookBuilder()); auto networkPtr = network.get(); auto executorPool = makeTaskExecutorPool(std::move(network), hookBuilder()); executorPool->startup(); auto shardRegistry(stdx::make_unique<ShardRegistry>(std::move(shardFactory), configCS)); auto catalogManager = makeCatalogManager(getGlobalServiceContext(), shardRegistry.get(), HostAndPort(getHostName(), serverGlobalParams.port)); auto rawCatalogManager = catalogManager.get(); grid.init( std::move(catalogManager), stdx::make_unique<CatalogCache>(), std::move(shardRegistry), stdx::make_unique<ClusterCursorManager>(getGlobalServiceContext()->getPreciseClockSource()), stdx::make_unique<BalancerConfiguration>(maxChunkSizeBytes), std::move(executorPool), networkPtr); auto status = rawCatalogManager->startup(); if (!status.isOK()) { return status; } return Status::OK(); }
void ShardRegistry::_updateLookupMapsForShard_inlock(shared_ptr<Shard> shard, const ConnectionString& newConnString) { auto oldConnString = shard->getConnString(); for (const auto& host : oldConnString.getServers()) { _lookup.erase(host.toString()); } _lookup[shard->getId()] = shard; if (newConnString.type() == ConnectionString::SET) { _rsLookup[newConnString.getSetName()] = shard; } // TODO: The only reason to have the shard host names in the lookup table is for the // setShardVersion call, which resolves the shard id from the shard address. This is // error-prone and will go away eventually when we switch all communications to go through // the remote command runner and all nodes are sharding aware by default. _lookup[newConnString.toString()] = shard; for (const HostAndPort& hostAndPort : newConnString.getServers()) { _lookup[hostAndPort.toString()] = shard; } }
Status DBClientShardResolver::chooseWriteHost( const string& shardName, ConnectionString* shardHost ) const { // Declare up here for parsing later string errMsg; // Special-case for config and admin if ( shardName == "config" || shardName == "admin" ) { *shardHost = ConnectionString::parse( configServer.modelServer(), errMsg ); dassert( errMsg == "" ); return Status::OK(); } // // First get the information about the shard from the shard cache // // Internally uses our shard cache, does no reload Shard shard = Shard::findIfExists( shardName ); if ( shard.getName() == "" ) { return Status( ErrorCodes::ShardNotFound, string("unknown shard name ") + shardName ); } ConnectionString rawShardHost = ConnectionString::parse( shard.getConnString(), errMsg ); dassert( errMsg == "" ); dassert( rawShardHost.type() == ConnectionString::SET || rawShardHost.type() == ConnectionString::MASTER ); if ( rawShardHost.type() == ConnectionString::MASTER ) { *shardHost = rawShardHost; return Status::OK(); } // // If we need to, then get the particular node we're targeting in the replica set // // Does not reload the monitor if it doesn't currently exist ReplicaSetMonitorPtr replMonitor = ReplicaSetMonitor::get( rawShardHost.getSetName(), false ); if ( !replMonitor ) { return Status( ErrorCodes::ReplicaSetNotFound, string("unknown replica set ") + rawShardHost.getSetName() ); } try { // This can throw when we don't find a master! HostAndPort masterHostAndPort = replMonitor->getMaster(); *shardHost = ConnectionString::parse( masterHostAndPort.toString( true ), errMsg ); dassert( errMsg == "" ); return Status::OK(); } catch ( const DBException& ) { return Status( ErrorCodes::HostNotFound, string("could not contact primary for replica set ") + replMonitor->getName() ); } // Unreachable dassert( false ); return Status( ErrorCodes::UnknownError, "" ); }
Status checkClusterMongoVersions(const ConnectionString& configLoc, const string& minMongoVersion) { scoped_ptr<ScopedDbConnection> connPtr; // // Find mongos pings in config server // try { connPtr.reset(new ScopedDbConnection(configLoc, 30)); ScopedDbConnection& conn = *connPtr; scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(MongosType::ConfigNS, Query()))); while (cursor->more()) { BSONObj pingDoc = cursor->next(); MongosType ping; string errMsg; // NOTE: We don't care if the ping is invalid, legacy stuff will be if (!ping.parseBSON(pingDoc, &errMsg)) { warning() << "could not parse ping document: " << pingDoc << causedBy(errMsg) << endl; continue; } string mongoVersion = "2.0"; // Hack to determine older mongos versions from ping format if (ping.isWaitingSet()) mongoVersion = "2.2"; if (ping.isMongoVersionSet() && ping.getMongoVersion() != "") { mongoVersion = ping.getMongoVersion(); } Date_t lastPing = ping.getPing(); long long quietIntervalMillis = 0; Date_t currentJsTime = jsTime(); if (currentJsTime >= lastPing) { quietIntervalMillis = static_cast<long long>(currentJsTime - lastPing); } long long quietIntervalMins = quietIntervalMillis / (60 * 1000); // We assume that anything that hasn't pinged in 5 minutes is probably down if (quietIntervalMins >= 5) { log() << "stale mongos detected " << quietIntervalMins << " minutes ago," << " network location is " << pingDoc["_id"].String() << ", not checking version" << endl; } else { if (versionCmp(mongoVersion, minMongoVersion) < 0) { return Status(ErrorCodes::RemoteValidationError, stream() << "version " << mongoVersion << " of mongos at " << ping.getName() << " is not compatible with the config update, " << "you must wait 5 minutes " << "after shutting down a pre-" << minMongoVersion << " mongos"); } } } } catch (const DBException& e) { return e.toStatus("could not read mongos pings collection"); } // // Load shards from config server // vector<ConnectionString> shardLocs; try { ScopedDbConnection& conn = *connPtr; scoped_ptr<DBClientCursor> cursor(_safeCursor(conn->query(ShardType::ConfigNS, Query()))); while (cursor->more()) { BSONObj shardDoc = cursor->next(); ShardType shard; string errMsg; if (!shard.parseBSON(shardDoc, &errMsg) || !shard.isValid(&errMsg)) { connPtr->done(); return Status(ErrorCodes::UnsupportedFormat, stream() << "invalid shard " << shardDoc << " read from the config server" << causedBy(errMsg)); } ConnectionString shardLoc = ConnectionString::parse(shard.getHost(), errMsg); if (shardLoc.type() == ConnectionString::INVALID) { connPtr->done(); return Status(ErrorCodes::UnsupportedFormat, stream() << "invalid shard host " << shard.getHost() << " read from the config server" << causedBy(errMsg)); } shardLocs.push_back(shardLoc); } } catch (const DBException& e) { return e.toStatus("could not read shards collection"); } connPtr->done(); // // We've now got all the shard info from the config server, start contacting the shards // and verifying their versions. // for (vector<ConnectionString>::iterator it = shardLocs.begin(); it != shardLocs.end(); ++it) { ConnectionString& shardLoc = *it; vector<HostAndPort> servers = shardLoc.getServers(); for (vector<HostAndPort>::iterator serverIt = servers.begin(); serverIt != servers.end(); ++serverIt) { // Note: This will *always* be a single-host connection ConnectionString serverLoc(*serverIt); log() << "checking that version of host " << serverLoc << " is compatible with " << minMongoVersion << endl; scoped_ptr<ScopedDbConnection> serverConnPtr; bool resultOk; BSONObj buildInfo; try { serverConnPtr.reset(new ScopedDbConnection(serverLoc, 30)); ScopedDbConnection& serverConn = *serverConnPtr; resultOk = serverConn->runCommand("admin", BSON("buildInfo" << 1), buildInfo); } catch (const DBException& e) { warning() << "could not run buildInfo command on " << serverLoc.toString() << causedBy(e) << ", you must manually verify this mongo server is " << "offline (for at least 5 minutes) or of a version >= 2.2" << endl; continue; } // TODO: Make running commands saner such that we can consolidate error handling if (!resultOk) { return Status(ErrorCodes::UnknownError, stream() << DBClientConnection::getLastErrorString(buildInfo) << causedBy(buildInfo.toString())); } serverConnPtr->done(); verify(buildInfo["version"].type() == String); string mongoVersion = buildInfo["version"].String(); if (versionCmp(mongoVersion, minMongoVersion) < 0) { return Status(ErrorCodes::RemoteValidationError, stream() << "version " << mongoVersion << " of mongo server at " << serverLoc.toString() << " is not compatible with the config update"); } } } return Status::OK(); }
StatusWith<std::string> ShardingCatalogManagerImpl::addShard( OperationContext* opCtx, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // Only one addShard operation can be in progress at a time. Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); // Check if this shard has already been added (can happen in the case of a retry after a network // error, for example) and thus this addShard request should be considered a no-op. auto existingShard = _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); if (!existingShard.isOK()) { return existingShard.getStatus(); } if (existingShard.getValue()) { // These hosts already belong to an existing shard, so report success and terminate the // addShard request. Make sure to set the last optime for the client to the system last // optime so that we'll still wait for replication so that this state is visible in the // committed snapshot. repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); return existingShard.getValue()->getName(); } // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the // set with that setName from the ReplicaSetMonitorManager and will create a new // ReplicaSetMonitor when targeting the set below. // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a // shard is removed. if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { // If the first reload joined an existing one, call reload again to ensure the reload is // fresh. Grid::get(opCtx)->shardRegistry()->reload(opCtx); } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); auto stopMonitoringGuard = MakeGuard([&] { if (shardConnectionString.type() == ConnectionString::SET) { // This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is // added, it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); } }); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); // Check that none of the existing shard candidate's dbs exist already auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } for (const auto& dbName : dbNamesStatus.getValue()) { auto dbt = Grid::get(opCtx)->catalogClient(opCtx)->getDatabase(opCtx, dbName); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { auto result = generateNewShardName(opCtx); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } // If the minimum allowed version for the cluster is 3.4, set the featureCompatibilityVersion to // 3.4 on the shard. if (serverGlobalParams.featureCompatibility.version.load() == ServerGlobalParams::FeatureCompatibility::Version::k34) { auto versionResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", BSON(FeatureCompatibilityVersion::kCommandName << FeatureCompatibilityVersionCommandParser::kVersion34)); if (!versionResponse.isOK()) { return versionResponse.getStatus(); } if (!versionResponse.getValue().commandStatus.isOK()) { if (versionResponse.getStatus().code() == ErrorCodes::CommandNotFound) { return {ErrorCodes::OperationFailed, "featureCompatibilityVersion for cluster is 3.4, cannot add a shard with " "version below 3.4. See " "http://dochub.mongodb.org/core/3.4-feature-compatibility."}; } return versionResponse.getValue().commandStatus; } } if (!MONGO_FAIL_POINT(dontUpsertShardIdentityOnNewShards)) { auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName()); LOG(2) << "going to insert shardIdentity document into shard: " << shardType; auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; auto batchResponseStatus = Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); if (!batchResponseStatus.isOK()) { return batchResponseStatus; } } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = Grid::get(opCtx)->catalogClient(opCtx)->insertConfigDocument( opCtx, ShardType::ConfigNS, shardType.toBSON(), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } // Add all databases which were discovered on the new shard for (const auto& dbName : dbNamesStatus.getValue()) { DatabaseType dbt; dbt.setName(dbName); dbt.setPrimary(shardType.getName()); dbt.setSharded(false); Status status = Grid::get(opCtx)->catalogClient(opCtx)->updateDatabase(opCtx, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); Grid::get(opCtx)->catalogClient(opCtx)->logChange( opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(opCtx)->shardRegistry(); if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } stopMonitoringGuard.Dismiss(); return shardType.getName(); }
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; if ( ! name ) { name = &nameInternal; } ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. vector<string> dbNames; try { ScopedDbConnection newShardConn(servers.toString()); newShardConn->getLastError(); if ( newShardConn->type() == ConnectionString::SYNC ) { newShardConn.done(); errMsg = "can't use sync cluster as a shard. for replica set, have to use <setname>/<server1>,<server2>,..."; return false; } BSONObj resIsMongos; bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos ); // should return ok=0, cmd not found if it's a normal mongod if ( ok ) { errMsg = "can't add a mongos process as a shard"; newShardConn.done(); return false; } BSONObj resIsMaster; ok = newShardConn->runCommand( "admin" , BSON( "isMaster" << 1 ) , resIsMaster ); if ( !ok ) { ostringstream ss; ss << "failed running isMaster: " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard has only one host, make sure it is not part of a replica set string setName = resIsMaster["setName"].str(); string commandSetName = servers.getSetName(); if ( commandSetName.empty() && ! setName.empty() ) { ostringstream ss; ss << "host is part of set " << setName << ", use replica set url format <setname>/<server1>,<server2>,...."; errMsg = ss.str(); newShardConn.done(); return false; } if ( !commandSetName.empty() && setName.empty() ) { ostringstream ss; ss << "host did not return a set name, is the replica set still initializing? " << resIsMaster; errMsg = ss.str(); newShardConn.done(); return false; } // if the shard is part of replica set, make sure it is the right one if ( ! commandSetName.empty() && ( commandSetName != setName ) ) { ostringstream ss; ss << "host is part of a different set: " << setName; errMsg = ss.str(); newShardConn.done(); return false; } if( setName.empty() ) { // check this isn't a --configsvr BSONObj res; bool ok = newShardConn->runCommand("admin",BSON("replSetGetStatus"<<1),res); ostringstream ss; if( !ok && res["info"].type() == String && res["info"].String() == "configsvr" ) { errMsg = "the specified mongod is a --configsvr and should thus not be a shard server"; newShardConn.done(); return false; } } // if the shard is part of a replica set, make sure all the hosts mentioned in 'servers' are part of // the set. It is fine if not all members of the set are present in 'servers'. bool foundAll = true; string offendingHost; if ( ! commandSetName.empty() ) { set<string> hostSet; BSONObjIterator iter( resIsMaster["hosts"].Obj() ); while ( iter.more() ) { hostSet.insert( iter.next().String() ); // host:port } if ( resIsMaster["passives"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["passives"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } if ( resIsMaster["arbiters"].isABSONObj() ) { BSONObjIterator piter( resIsMaster["arbiters"].Obj() ); while ( piter.more() ) { hostSet.insert( piter.next().String() ); // host:port } } vector<HostAndPort> hosts = servers.getServers(); for ( size_t i = 0 ; i < hosts.size() ; i++ ) { if (!hosts[i].hasPort()) { hosts[i].setPort(ServerGlobalParams::DefaultDBPort); } string host = hosts[i].toString(); // host:port if ( hostSet.find( host ) == hostSet.end() ) { offendingHost = host; foundAll = false; break; } } } if ( ! foundAll ) { ostringstream ss; ss << "in seed list " << servers.toString() << ", host " << offendingHost << " does not belong to replica set " << setName; errMsg = ss.str(); newShardConn.done(); return false; } // shard name defaults to the name of the replica set if ( name->empty() && ! setName.empty() ) *name = setName; // In order to be accepted as a new shard, that mongod must not have any database name that exists already // in any other shards. If that test passes, the new shard's databases are going to be entered as // non-sharded db's whose primary is the newly added shard. BSONObj resListDB; ok = newShardConn->runCommand( "admin" , BSON( "listDatabases" << 1 ) , resListDB ); if ( !ok ) { ostringstream ss; ss << "failed listing " << servers.toString() << "'s databases:" << resListDB; errMsg = ss.str(); newShardConn.done(); return false; } BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( newShardConn->type() == ConnectionString::SET ) rsMonitor = ReplicaSetMonitor::get( setName ); newShardConn.done(); } catch ( DBException& e ) { if ( servers.type() == ConnectionString::SET ) { ReplicaSetMonitor::remove( servers.getSetName() ); } ostringstream ss; ss << "couldn't connect to new shard "; ss << e.what(); errMsg = ss.str(); return false; } // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false ); if ( config.get() != NULL ) { ostringstream ss; ss << "can't add shard " << servers.toString() << " because a local database '" << *it; ss << "' exists in another " << config->getPrimary().toString(); errMsg = ss.str(); return false; } } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) { errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append(ShardType::name(), *name); b.append(ShardType::host(), rsMonitor ? rsMonitor->getServerAddress() : servers.toString()); if (maxSize > 0) { b.append(ShardType::maxSize(), maxSize); } BSONObj shardDoc = b.obj(); { ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->findOne(ShardType::ConfigNS, BSON(ShardType::host(servers.toString()))); if ( ! old.isEmpty() ) { errMsg = "host already used"; conn.done(); return false; } conn.done(); } log() << "going to add shard: " << shardDoc << endl; Status result = clusterInsert( ShardType::ConfigNS, shardDoc, WriteConcernOptions::AllConfigs, NULL ); if ( !result.isOK() ) { errMsg = result.reason(); log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; return false; } Shard::reloadShardInfo(); // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name ); if ( ! config ) { log() << "adding shard " << servers << " even though could not add database " << *it << endl; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", *name); shardDetails.append("host", servers.toString()); configServer.logChange("addShard", "", shardDetails.obj()); return true; }
StatusWith<std::string> ShardingCatalogManager::addShard( OperationContext* opCtx, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // Only one addShard operation can be in progress at a time. Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); // Check if this shard has already been added (can happen in the case of a retry after a network // error, for example) and thus this addShard request should be considered a no-op. auto existingShard = _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); if (!existingShard.isOK()) { return existingShard.getStatus(); } if (existingShard.getValue()) { // These hosts already belong to an existing shard, so report success and terminate the // addShard request. Make sure to set the last optime for the client to the system last // optime so that we'll still wait for replication so that this state is visible in the // committed snapshot. repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); return existingShard.getValue()->getName(); } // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the // set with that setName from the ReplicaSetMonitorManager and will create a new // ReplicaSetMonitor when targeting the set below. // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a // shard is removed. if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { // If the first reload joined an existing one, call reload again to ensure the reload is // fresh. Grid::get(opCtx)->shardRegistry()->reload(opCtx); } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); auto stopMonitoringGuard = MakeGuard([&] { if (shardConnectionString.type() == ConnectionString::SET) { // This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is // added, it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); } }); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); // Check that none of the existing shard candidate's dbs exist already auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } for (const auto& dbName : dbNamesStatus.getValue()) { auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase( opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // Check that the shard candidate does not have a local config.system.sessions collection auto res = _dropSessionsCollection(opCtx, targeter); if (!res.isOK()) { return res.withContext( "can't add shard with a local copy of config.system.sessions, please drop this " "collection from the shard manually and try again."); } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { auto result = generateNewShardName(opCtx); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } // Helper function that runs a command on the to-be shard and returns the status auto runCmdOnNewShard = [this, &opCtx, &targeter](const BSONObj& cmd) -> Status { auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, cmd); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } // Grabs the underlying status from a StatusWith object by taking the first // non-OK status, if there is one. This is needed due to the semantics of // _runCommandForAddShard. auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; return Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); }; AddShard addShardCmd = add_shard_util::createAddShardCmd(opCtx, shardType.getName()); auto addShardCmdBSON = [&]() { // In 4.2, use the _addShard command to add the shard, which in turn inserts a // shardIdentity document into the shard and triggers sharding state initialization. // In the unlikely scenario that there's a downgrade to 4.0 between the // construction of this command object and the issuing of the command // on the receiving shard, the user will receive a rather harmless // CommandNotFound error for _addShard, and can simply retry. if (serverGlobalParams.featureCompatibility.getVersion() == ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) { // Needed for IDL toBSON method BSONObj passthroughFields; return addShardCmd.toBSON(passthroughFields); } else { // To support backwards compatibility with v4.0 shards, insert a shardIdentity document // directly. return add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd); } }(); auto addShardStatus = runCmdOnNewShard(addShardCmdBSON); if (!addShardStatus.isOK()) { return addShardStatus; } { // Hold the fcvLock across checking the FCV, sending setFCV to the new shard, and // writing the entry for the new shard to config.shards. This ensures the FCV doesn't change // after we send setFCV to the new shard, but before we write its entry to config.shards. // (Note, we don't use a Global IX lock here, because we don't want to hold the global lock // while blocking on the network). invariant(!opCtx->lockState()->isLocked()); Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock); BSONObj setFCVCmd; switch (serverGlobalParams.featureCompatibility.getVersion()) { case ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42: case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42: setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName << FeatureCompatibilityVersionParser::kVersion42 << WriteConcernOptions::kWriteConcernField << opCtx->getWriteConcern().toBSON()); break; default: setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName << FeatureCompatibilityVersionParser::kVersion40 << WriteConcernOptions::kWriteConcernField << opCtx->getWriteConcern().toBSON()); break; } auto versionResponse = _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, setFCVCmd); if (!versionResponse.isOK()) { return versionResponse.getStatus(); } if (!versionResponse.getValue().commandStatus.isOK()) { return versionResponse.getValue().commandStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument( opCtx, ShardType::ConfigNS, shardType.toBSON(), ShardingCatalogClient::kLocalWriteConcern); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } } // Add all databases which were discovered on the new shard for (const auto& dbName : dbNamesStatus.getValue()) { DatabaseType dbt(dbName, shardType.getName(), false, databaseVersion::makeNew()); { const auto status = Grid::get(opCtx)->catalogClient()->updateConfigDocument( opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::name(dbName)), dbt.toBSON(), true, ShardingCatalogClient::kLocalWriteConcern); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); Grid::get(opCtx)->catalogClient()->logChange( opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(opCtx)->shardRegistry(); if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } stopMonitoringGuard.Dismiss(); return shardType.getName(); }
StatusWith<boost::optional<ShardType>> ShardingCatalogManager::_checkIfShardExists( OperationContext* opCtx, const ConnectionString& proposedShardConnectionString, const std::string* proposedShardName, long long proposedShardMaxSize) { // Check whether any host in the connection is already part of the cluster. const auto existingShards = Grid::get(opCtx)->catalogClient()->getAllShards( opCtx, repl::ReadConcernLevel::kLocalReadConcern); if (!existingShards.isOK()) { return existingShards.getStatus().withContext( "Failed to load existing shards during addShard"); } // Now check if this shard already exists - if it already exists *with the same options* then // the addShard request can return success early without doing anything more. for (const auto& existingShard : existingShards.getValue().value) { auto swExistingShardConnStr = ConnectionString::parse(existingShard.getHost()); if (!swExistingShardConnStr.isOK()) { return swExistingShardConnStr.getStatus(); } auto existingShardConnStr = std::move(swExistingShardConnStr.getValue()); // Function for determining if the options for the shard that is being added match the // options of an existing shard that conflicts with it. auto shardsAreEquivalent = [&]() { if (proposedShardName && *proposedShardName != existingShard.getName()) { return false; } if (proposedShardConnectionString.type() != existingShardConnStr.type()) { return false; } if (proposedShardConnectionString.type() == ConnectionString::SET && proposedShardConnectionString.getSetName() != existingShardConnStr.getSetName()) { return false; } if (proposedShardMaxSize != existingShard.getMaxSizeMB()) { return false; } return true; }; if (existingShardConnStr.type() == ConnectionString::SET && proposedShardConnectionString.type() == ConnectionString::SET && existingShardConnStr.getSetName() == proposedShardConnectionString.getSetName()) { // An existing shard has the same replica set name as the shard being added. // If the options aren't the same, then this is an error, // but if the options match then the addShard operation should be immediately // considered a success and terminated. if (shardsAreEquivalent()) { return {existingShard}; } else { return {ErrorCodes::IllegalOperation, str::stream() << "A shard already exists containing the replica set '" << existingShardConnStr.getSetName() << "'"}; } } for (const auto& existingHost : existingShardConnStr.getServers()) { // Look if any of the hosts in the existing shard are present within the shard trying // to be added. for (const auto& addingHost : proposedShardConnectionString.getServers()) { if (existingHost == addingHost) { // At least one of the hosts in the shard being added already exists in an // existing shard. If the options aren't the same, then this is an error, // but if the options match then the addShard operation should be immediately // considered a success and terminated. if (shardsAreEquivalent()) { return {existingShard}; } else { return {ErrorCodes::IllegalOperation, str::stream() << "'" << addingHost.toString() << "' " << "is already a member of the existing shard '" << existingShard.getHost() << "' (" << existingShard.getName() << ")."}; } } } } if (proposedShardName && *proposedShardName == existingShard.getName()) { // If we get here then we're trying to add a shard with the same name as an existing // shard, but there was no overlap in the hosts between the existing shard and the // proposed connection string for the new shard. return {ErrorCodes::IllegalOperation, str::stream() << "A shard named " << *proposedShardName << " already exists"}; } } return {boost::none}; }
void ShardRegistry::updateReplSetHosts(const ConnectionString& newConnString) { invariant(newConnString.type() == ConnectionString::SET || newConnString.type() == ConnectionString::CUSTOM); // For dbtests _data.rebuildShardIfExists(newConnString, _shardFactory.get()); }
StatusWith<string> ShardingCatalogManagerImpl::addShard( OperationContext* txn, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(txn)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(txn, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { // TODO: This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is added, // it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); auto dbNamesStatus = _getDBNamesListFromShard(txn, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } // Check that none of the existing shard candidate's dbs exist already for (const string& dbName : dbNamesStatus.getValue()) { auto dbt = _catalogClient->getDatabase(txn, dbName); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { StatusWith<string> result = _generateNewShardName(txn); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } ShardIdentityType shardIdentity; shardIdentity.setConfigsvrConnString( Grid::get(txn)->shardRegistry()->getConfigServerConnectionString()); shardIdentity.setShardName(shardType.getName()); shardIdentity.setClusterId(Grid::get(txn)->shardRegistry()->getClusterId()); auto validateStatus = shardIdentity.validate(); if (!validateStatus.isOK()) { return validateStatus; } log() << "going to insert shardIdentity document into shard: " << shardIdentity.toString(); auto updateRequest = shardIdentity.createUpsertForAddShard(); BatchedCommandRequest commandRequest(updateRequest.release()); commandRequest.setNS(NamespaceString::kConfigCollectionNamespace); commandRequest.setWriteConcern(kMajorityWriteConcern.toBSON()); auto swCommandResponse = _runCommandForAddShard(txn, targeter.get(), "admin", commandRequest.toBSON()); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; auto batchResponseStatus = Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); if (!batchResponseStatus.isOK()) { return batchResponseStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = _catalogClient->insertConfigDocument(txn, ShardType::ConfigNS, shardType.toBSON()); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); if (result == ErrorCodes::DuplicateKey) { // TODO(SERVER-24213): adding a shard that already exists should be considered success, // however this approach does no validation that we are adding the shard with the same // options. It also does not protect against adding the same shard with a different // shard name and slightly different connection string. This is a temporary hack to // get the continuous stepdown suite passing. warning() << "Received duplicate key error when inserting new shard with name " << shardType.getName() << " and connection string " << shardConnectionString.toString() << " to config.shards collection. This most likely means that there was an " "attempt to add a shard that already exists in the cluster"; return shardType.getName(); } return result; } // Add all databases which were discovered on the new shard for (const string& dbName : dbNamesStatus.getValue()) { DatabaseType dbt; dbt.setName(dbName); dbt.setPrimary(shardType.getName()); dbt.setSharded(false); Status status = _catalogClient->updateDatabase(txn, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); _catalogClient->logChange(txn, "addShard", "", shardDetails.obj()); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(txn)->shardRegistry(); if (!shardRegistry->getShard(txn, shardType.getName())) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } return shardType.getName(); }
bool Grid::addShard( string* name , const ConnectionString& servers , long long maxSize , string& errMsg ) { // name can be NULL, so provide a dummy one here to avoid testing it elsewhere string nameInternal; if ( ! name ) { name = &nameInternal; } ReplicaSetMonitorPtr rsMonitor; // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be // brought into the grid without conflict. if ( servers.type() == ConnectionString::SYNC ) { errMsg = "can't use sync cluster as a shard for replica set, " "have to use <setname>/<server1>,<server2>,..."; return false; } vector<string> dbNames; try { bool ok = false; { ScopedDbConnection newShardConn(servers.toString()); BSONObj resIsMongos; ok = newShardConn->runCommand( "admin", BSON( "isdbgrid" << 1 ), resIsMongos ); newShardConn.done(); } // should return ok=0, cmd not found if it's a normal mongod if ( ok ) { errMsg = "can't add a mongos process as a shard"; return false; } if ( servers.type() == ConnectionString::SET ) { if (!addReplSetShardCheck( servers, &errMsg )) { return false; } // shard name defaults to the name of the replica set if ( name->empty() && !servers.getSetName().empty() ) { *name = servers.getSetName(); } } // In order to be accepted as a new shard, that mongod must not have any database name // that exists already in any other shards. If that test passes, the new shard's // databases are going to be entered as non-sharded db's whose primary is the // newly added shard. BSONObj resListDB; { ScopedDbConnection newShardConn(servers.toString()); ok = newShardConn->runCommand( "admin", BSON( "listDatabases" << 1 ), resListDB ); newShardConn.done(); } if ( !ok ) { errMsg = str::stream() << "failed listing " << servers.toString() << "'s databases:" << resListDB;; return false; } BSONObjIterator i( resListDB["databases"].Obj() ); while ( i.more() ) { BSONObj dbEntry = i.next().Obj(); const string& dbName = dbEntry["name"].String(); if ( _isSpecialLocalDB( dbName ) ) { // 'local', 'admin', and 'config' are system DBs and should be excluded here continue; } else { dbNames.push_back( dbName ); } } if ( servers.type() == ConnectionString::SET ) { rsMonitor = ReplicaSetMonitor::get( servers.getSetName() ); } } catch ( DBException& e ) { if ( servers.type() == ConnectionString::SET ) { ReplicaSetMonitor::remove( servers.getSetName() ); } errMsg = str::stream() << "couldn't connect to new shard " << causedBy(e); return false; } // check that none of the existing shard candidate's db's exist elsewhere for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , false ); if ( config.get() != NULL ) { ostringstream ss; ss << "can't add shard " << servers.toString() << " because a local database '" << *it; ss << "' exists in another " << config->getPrimary().toString(); errMsg = ss.str(); return false; } } // if a name for a shard wasn't provided, pick one. if ( name->empty() && ! _getNewShardName( name ) ) { errMsg = "error generating new shard name"; return false; } // build the ConfigDB shard document BSONObjBuilder b; b.append(ShardType::name(), *name); b.append(ShardType::host(), rsMonitor ? rsMonitor->getServerAddress() : servers.toString()); if (maxSize > 0) { b.append(ShardType::maxSize(), maxSize); } BSONObj shardDoc = b.obj(); { ScopedDbConnection conn(configServer.getPrimary().getConnString(), 30); // check whether the set of hosts (or single host) is not an already a known shard BSONObj old = conn->findOne(ShardType::ConfigNS, BSON(ShardType::host(servers.toString()))); if ( ! old.isEmpty() ) { errMsg = "host already used"; conn.done(); return false; } log() << "going to add shard: " << shardDoc << endl; conn->insert(ShardType::ConfigNS , shardDoc); errMsg = conn->getLastError(); if ( ! errMsg.empty() ) { log() << "error adding shard: " << shardDoc << " err: " << errMsg << endl; conn.done(); return false; } conn.done(); } Shard::reloadShardInfo(); // add all databases of the new shard for ( vector<string>::const_iterator it = dbNames.begin(); it != dbNames.end(); ++it ) { DBConfigPtr config = getDBConfig( *it , true , *name ); if ( ! config ) { log() << "adding shard " << servers << " even though could not add database " << *it << endl; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", *name); shardDetails.append("host", servers.toString()); configServer.logChange("addShard", "", shardDetails.obj()); return true; }