StatusWith<std::string> ShardingCatalogManager::addShard( OperationContext* opCtx, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // Only one addShard operation can be in progress at a time. Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); // Check if this shard has already been added (can happen in the case of a retry after a network // error, for example) and thus this addShard request should be considered a no-op. auto existingShard = _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); if (!existingShard.isOK()) { return existingShard.getStatus(); } if (existingShard.getValue()) { // These hosts already belong to an existing shard, so report success and terminate the // addShard request. Make sure to set the last optime for the client to the system last // optime so that we'll still wait for replication so that this state is visible in the // committed snapshot. repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); return existingShard.getValue()->getName(); } // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the // set with that setName from the ReplicaSetMonitorManager and will create a new // ReplicaSetMonitor when targeting the set below. // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a // shard is removed. if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { // If the first reload joined an existing one, call reload again to ensure the reload is // fresh. Grid::get(opCtx)->shardRegistry()->reload(opCtx); } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); auto stopMonitoringGuard = MakeGuard([&] { if (shardConnectionString.type() == ConnectionString::SET) { // This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is // added, it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); } }); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); // Check that none of the existing shard candidate's dbs exist already auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } for (const auto& dbName : dbNamesStatus.getValue()) { auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase( opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // Check that the shard candidate does not have a local config.system.sessions collection auto res = _dropSessionsCollection(opCtx, targeter); if (!res.isOK()) { return res.withContext( "can't add shard with a local copy of config.system.sessions, please drop this " "collection from the shard manually and try again."); } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { auto result = generateNewShardName(opCtx); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } // Helper function that runs a command on the to-be shard and returns the status auto runCmdOnNewShard = [this, &opCtx, &targeter](const BSONObj& cmd) -> Status { auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, cmd); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } // Grabs the underlying status from a StatusWith object by taking the first // non-OK status, if there is one. This is needed due to the semantics of // _runCommandForAddShard. auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; return Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); }; AddShard addShardCmd = add_shard_util::createAddShardCmd(opCtx, shardType.getName()); auto addShardCmdBSON = [&]() { // In 4.2, use the _addShard command to add the shard, which in turn inserts a // shardIdentity document into the shard and triggers sharding state initialization. // In the unlikely scenario that there's a downgrade to 4.0 between the // construction of this command object and the issuing of the command // on the receiving shard, the user will receive a rather harmless // CommandNotFound error for _addShard, and can simply retry. if (serverGlobalParams.featureCompatibility.getVersion() == ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42) { // Needed for IDL toBSON method BSONObj passthroughFields; return addShardCmd.toBSON(passthroughFields); } else { // To support backwards compatibility with v4.0 shards, insert a shardIdentity document // directly. return add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd); } }(); auto addShardStatus = runCmdOnNewShard(addShardCmdBSON); if (!addShardStatus.isOK()) { return addShardStatus; } { // Hold the fcvLock across checking the FCV, sending setFCV to the new shard, and // writing the entry for the new shard to config.shards. This ensures the FCV doesn't change // after we send setFCV to the new shard, but before we write its entry to config.shards. // (Note, we don't use a Global IX lock here, because we don't want to hold the global lock // while blocking on the network). invariant(!opCtx->lockState()->isLocked()); Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock); BSONObj setFCVCmd; switch (serverGlobalParams.featureCompatibility.getVersion()) { case ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo42: case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42: setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName << FeatureCompatibilityVersionParser::kVersion42 << WriteConcernOptions::kWriteConcernField << opCtx->getWriteConcern().toBSON()); break; default: setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName << FeatureCompatibilityVersionParser::kVersion40 << WriteConcernOptions::kWriteConcernField << opCtx->getWriteConcern().toBSON()); break; } auto versionResponse = _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, setFCVCmd); if (!versionResponse.isOK()) { return versionResponse.getStatus(); } if (!versionResponse.getValue().commandStatus.isOK()) { return versionResponse.getValue().commandStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument( opCtx, ShardType::ConfigNS, shardType.toBSON(), ShardingCatalogClient::kLocalWriteConcern); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } } // Add all databases which were discovered on the new shard for (const auto& dbName : dbNamesStatus.getValue()) { DatabaseType dbt(dbName, shardType.getName(), false, databaseVersion::makeNew()); { const auto status = Grid::get(opCtx)->catalogClient()->updateConfigDocument( opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::name(dbName)), dbt.toBSON(), true, ShardingCatalogClient::kLocalWriteConcern); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); Grid::get(opCtx)->catalogClient()->logChange( opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(opCtx)->shardRegistry(); if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } stopMonitoringGuard.Dismiss(); return shardType.getName(); }
StatusWith<std::string> ShardingCatalogManager::addShard( OperationContext* opCtx, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // Only one addShard operation can be in progress at a time. Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); // Check if this shard has already been added (can happen in the case of a retry after a network // error, for example) and thus this addShard request should be considered a no-op. auto existingShard = _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); if (!existingShard.isOK()) { return existingShard.getStatus(); } if (existingShard.getValue()) { // These hosts already belong to an existing shard, so report success and terminate the // addShard request. Make sure to set the last optime for the client to the system last // optime so that we'll still wait for replication so that this state is visible in the // committed snapshot. repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); return existingShard.getValue()->getName(); } // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the // set with that setName from the ReplicaSetMonitorManager and will create a new // ReplicaSetMonitor when targeting the set below. // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a // shard is removed. if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { // If the first reload joined an existing one, call reload again to ensure the reload is // fresh. Grid::get(opCtx)->shardRegistry()->reload(opCtx); } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); auto stopMonitoringGuard = MakeGuard([&] { if (shardConnectionString.type() == ConnectionString::SET) { // This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is // added, it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); } }); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); // Check that none of the existing shard candidate's dbs exist already auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } for (const auto& dbName : dbNamesStatus.getValue()) { auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase( opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // Check that the shard candidate does not have a local config.system.sessions collection auto res = _dropSessionsCollection(opCtx, targeter); if (!res.isOK()) { return res.withContext( "can't add shard with a local copy of config.system.sessions, please drop this " "collection from the shard manually and try again."); } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { auto result = generateNewShardName(opCtx); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } // Insert a shardIdentity document onto the shard. This also triggers sharding initialization on // the shard. LOG(2) << "going to insert shardIdentity document into shard: " << shardType; auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName()); auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; auto batchResponseStatus = Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); if (!batchResponseStatus.isOK()) { return batchResponseStatus; } // The featureCompatibilityVersion should be the same throughout the cluster. We don't // explicitly send writeConcern majority to the added shard, because a 3.4 mongod will reject // it (setFCV did not support writeConcern until 3.6), and a 3.6 mongod will still default to // majority writeConcern. // // TODO SERVER-32045: propagate the user's writeConcern auto versionResponse = _runCommandForAddShard( opCtx, targeter.get(), "admin", BSON(FeatureCompatibilityVersion::kCommandName << FeatureCompatibilityVersion::toString( serverGlobalParams.featureCompatibility.getVersion()))); if (!versionResponse.isOK()) { return versionResponse.getStatus(); } if (!versionResponse.getValue().commandStatus.isOK()) { return versionResponse.getValue().commandStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument( opCtx, ShardType::ConfigNS, shardType.toBSON(), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } // Add all databases which were discovered on the new shard for (const auto& dbName : dbNamesStatus.getValue()) { DatabaseType dbt(dbName, shardType.getName(), false); Status status = Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); Grid::get(opCtx) ->catalogClient() ->logChange( opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern) .transitional_ignore(); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(opCtx)->shardRegistry(); if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } stopMonitoringGuard.Dismiss(); return shardType.getName(); }