Status CatalogManager::createDatabase(const std::string& dbName) { invariant(nsIsDbOnly(dbName)); // The admin and config databases should never be explicitly created. They "just exist", // i.e. getDatabase will always return an entry for them. invariant(dbName != "admin"); invariant(dbName != "config"); // Lock the database globally to prevent conflicts with simultaneous database creation. auto scopedDistLock = getDistLockManager()->lock(dbName, "createDatabase", Seconds{5000}, Milliseconds{500}); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } // check for case sensitivity violations Status status = _checkDbDoesNotExist(dbName); if (!status.isOK()) { return status; } // Database does not exist, pick a shard and create a new entry auto newShardIdStatus = selectShardForNewDatabase(grid.shardRegistry()); if (!newShardIdStatus.isOK()) { return newShardIdStatus.getStatus(); } const ShardId& newShardId = newShardIdStatus.getValue(); log() << "Placing [" << dbName << "] on: " << newShardId; DatabaseType db; db.setName(dbName); db.setPrimary(newShardId); db.setSharded(false); BatchedCommandResponse response; status = insert(DatabaseType::ConfigNS, db.toBSON(), &response); if (status.code() == ErrorCodes::DuplicateKey) { return Status(ErrorCodes::NamespaceExists, "database " + dbName + " already exists"); } return status; }
StatusWith<OpTimePair<DatabaseType>> CatalogManagerReplicaSet::getDatabase( OperationContext* txn, const std::string& dbName) { invariant(nsIsDbOnly(dbName)); // The two databases that are hosted on the config server are config and admin if (dbName == "config" || dbName == "admin") { DatabaseType dbt; dbt.setName(dbName); dbt.setSharded(false); dbt.setPrimary("config"); return OpTimePair<DatabaseType>(dbt); } const auto configShard = grid.shardRegistry()->getShard(txn, "config"); const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector); if (!readHost.isOK()) { return readHost.getStatus(); } auto findStatus = _exhaustiveFindOnConfig(readHost.getValue(), NamespaceString(DatabaseType::ConfigNS), BSON(DatabaseType::name(dbName)), BSONObj(), 1); if (!findStatus.isOK()) { return findStatus.getStatus(); } const auto& docsWithOpTime = findStatus.getValue(); if (docsWithOpTime.value.empty()) { return {ErrorCodes::DatabaseNotFound, stream() << "database " << dbName << " not found"}; } invariant(docsWithOpTime.value.size() == 1); auto parseStatus = DatabaseType::fromBSON(docsWithOpTime.value.front()); if (!parseStatus.isOK()) { return parseStatus.getStatus(); } return OpTimePair<DatabaseType>(parseStatus.getValue(), docsWithOpTime.opTime); }
void DBConfig::_save(OperationContext* txn, bool db, bool coll) { if (db) { DatabaseType dbt; dbt.setName(_name); dbt.setPrimary(_primaryId); dbt.setSharded(_shardingEnabled); uassertStatusOK(grid.catalogClient(txn)->updateDatabase(txn, _name, dbt)); } if (coll) { for (CollectionInfoMap::iterator i = _collections.begin(); i != _collections.end(); ++i) { if (!i->second.isDirty()) { continue; } i->second.save(txn, i->first); } } }
Status CatalogManagerCommon::enableSharding(OperationContext* txn, const std::string& dbName) { invariant(nsIsDbOnly(dbName)); DatabaseType db; // Lock the database globally to prevent conflicts with simultaneous database // creation/modification. auto scopedDistLock = getDistLockManager()->lock(dbName, "enableSharding", Seconds{5}, Milliseconds{500}); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } // Check for case sensitivity violations Status status = _checkDbDoesNotExist(txn, dbName, &db); if (status.isOK()) { // Database does not exist, create a new entry auto newShardIdStatus = selectShardForNewDatabase(txn, grid.shardRegistry()); if (!newShardIdStatus.isOK()) { return newShardIdStatus.getStatus(); } const ShardId& newShardId = newShardIdStatus.getValue(); log() << "Placing [" << dbName << "] on: " << newShardId; db.setName(dbName); db.setPrimary(newShardId); db.setSharded(true); } else if (status.code() == ErrorCodes::NamespaceExists) { // Database exists, so just update it db.setSharded(true); } else { return status; } log() << "Enabling sharding for database [" << dbName << "] in config db"; return updateDatabase(txn, dbName, db); }
StatusWith<DatabaseType> CatalogManagerReplicaSet::getDatabase(const std::string& dbName) { invariant(nsIsDbOnly(dbName)); // The two databases that are hosted on the config server are config and admin if (dbName == "config" || dbName == "admin") { DatabaseType dbt; dbt.setName(dbName); dbt.setSharded(false); dbt.setPrimary("config"); return dbt; } const auto configShard = grid.shardRegistry()->getShard("config"); const auto readHost = configShard->getTargeter()->findHost(kConfigReadSelector); if (!readHost.isOK()) { return readHost.getStatus(); } auto findStatus = grid.shardRegistry()->exhaustiveFind(readHost.getValue(), NamespaceString(DatabaseType::ConfigNS), BSON(DatabaseType::name(dbName)), 1); if (!findStatus.isOK()) { return findStatus.getStatus(); } const auto& docs = findStatus.getValue(); if (docs.empty()) { return {ErrorCodes::NamespaceNotFound, stream() << "database " << dbName << " not found"}; } invariant(docs.size() == 1); return DatabaseType::fromBSON(docs.front()); }
StatusWith<string> ShardingCatalogManagerImpl::addShard( OperationContext* txn, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(txn)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(txn, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { // TODO: This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is added, // it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); auto dbNamesStatus = _getDBNamesListFromShard(txn, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } // Check that none of the existing shard candidate's dbs exist already for (const string& dbName : dbNamesStatus.getValue()) { auto dbt = _catalogClient->getDatabase(txn, dbName); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { StatusWith<string> result = _generateNewShardName(txn); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } ShardIdentityType shardIdentity; shardIdentity.setConfigsvrConnString( Grid::get(txn)->shardRegistry()->getConfigServerConnectionString()); shardIdentity.setShardName(shardType.getName()); shardIdentity.setClusterId(Grid::get(txn)->shardRegistry()->getClusterId()); auto validateStatus = shardIdentity.validate(); if (!validateStatus.isOK()) { return validateStatus; } log() << "going to insert shardIdentity document into shard: " << shardIdentity.toString(); auto updateRequest = shardIdentity.createUpsertForAddShard(); BatchedCommandRequest commandRequest(updateRequest.release()); commandRequest.setNS(NamespaceString::kConfigCollectionNamespace); commandRequest.setWriteConcern(kMajorityWriteConcern.toBSON()); auto swCommandResponse = _runCommandForAddShard(txn, targeter.get(), "admin", commandRequest.toBSON()); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; auto batchResponseStatus = Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); if (!batchResponseStatus.isOK()) { return batchResponseStatus; } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = _catalogClient->insertConfigDocument(txn, ShardType::ConfigNS, shardType.toBSON()); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); if (result == ErrorCodes::DuplicateKey) { // TODO(SERVER-24213): adding a shard that already exists should be considered success, // however this approach does no validation that we are adding the shard with the same // options. It also does not protect against adding the same shard with a different // shard name and slightly different connection string. This is a temporary hack to // get the continuous stepdown suite passing. warning() << "Received duplicate key error when inserting new shard with name " << shardType.getName() << " and connection string " << shardConnectionString.toString() << " to config.shards collection. This most likely means that there was an " "attempt to add a shard that already exists in the cluster"; return shardType.getName(); } return result; } // Add all databases which were discovered on the new shard for (const string& dbName : dbNamesStatus.getValue()) { DatabaseType dbt; dbt.setName(dbName); dbt.setPrimary(shardType.getName()); dbt.setSharded(false); Status status = _catalogClient->updateDatabase(txn, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); _catalogClient->logChange(txn, "addShard", "", shardDetails.obj()); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(txn)->shardRegistry(); if (!shardRegistry->getShard(txn, shardType.getName())) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } return shardType.getName(); }
StatusWith<string> CatalogManagerCommon::addShard(OperationContext* txn, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { // Validate the specified connection string may serve as shard at all auto shardStatus = validateHostAsShard(txn, grid.shardRegistry(), shardConnectionString, shardProposedName); if (!shardStatus.isOK()) { // TODO: This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is added, // it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); auto dbNamesStatus = getDBNamesListFromShard(txn, grid.shardRegistry(), shardConnectionString); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } // Check that none of the existing shard candidate's dbs exist already for (const string& dbName : dbNamesStatus.getValue()) { auto dbt = getDatabase(txn, dbName); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { StatusWith<string> result = _generateNewShardName(txn); if (!result.isOK()) { return Status(ErrorCodes::OperationFailed, "error generating new shard name"); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } log() << "going to add shard: " << shardType.toString(); Status result = insert(txn, ShardType::ConfigNS, shardType.toBSON(), NULL); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } // Make sure the new shard is visible grid.shardRegistry()->reload(txn); // Add all databases which were discovered on the new shard for (const string& dbName : dbNamesStatus.getValue()) { DatabaseType dbt; dbt.setName(dbName); dbt.setPrimary(shardType.getName()); dbt.setSharded(false); Status status = updateDatabase(txn, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); logChange(txn, txn->getClient()->clientAddress(true), "addShard", "", shardDetails.obj()); return shardType.getName(); }
StatusWith<std::string> ShardingCatalogManagerImpl::addShard( OperationContext* opCtx, const std::string* shardProposedName, const ConnectionString& shardConnectionString, const long long maxSize) { if (shardConnectionString.type() == ConnectionString::INVALID) { return {ErrorCodes::BadValue, "Invalid connection string"}; } if (shardProposedName && shardProposedName->empty()) { return {ErrorCodes::BadValue, "shard name cannot be empty"}; } // Only one addShard operation can be in progress at a time. Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); // Check if this shard has already been added (can happen in the case of a retry after a network // error, for example) and thus this addShard request should be considered a no-op. auto existingShard = _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); if (!existingShard.isOK()) { return existingShard.getStatus(); } if (existingShard.getValue()) { // These hosts already belong to an existing shard, so report success and terminate the // addShard request. Make sure to set the last optime for the client to the system last // optime so that we'll still wait for replication so that this state is visible in the // committed snapshot. repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); return existingShard.getValue()->getName(); } // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the // set with that setName from the ReplicaSetMonitorManager and will create a new // ReplicaSetMonitor when targeting the set below. // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a // shard is removed. if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { // If the first reload joined an existing one, call reload again to ensure the reload is // fresh. Grid::get(opCtx)->shardRegistry()->reload(opCtx); } // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. const std::shared_ptr<Shard> shard{ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; invariant(shard); auto targeter = shard->getTargeter(); auto stopMonitoringGuard = MakeGuard([&] { if (shardConnectionString.type() == ConnectionString::SET) { // This is a workaround for the case were we could have some bad shard being // requested to be added and we put that bad connection string on the global replica set // monitor registry. It needs to be cleaned up so that when a correct replica set is // added, it will be recreated. ReplicaSetMonitor::remove(shardConnectionString.getSetName()); } }); // Validate the specified connection string may serve as shard at all auto shardStatus = _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); if (!shardStatus.isOK()) { return shardStatus.getStatus(); } ShardType& shardType = shardStatus.getValue(); // Check that none of the existing shard candidate's dbs exist already auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); if (!dbNamesStatus.isOK()) { return dbNamesStatus.getStatus(); } for (const auto& dbName : dbNamesStatus.getValue()) { auto dbt = Grid::get(opCtx)->catalogClient(opCtx)->getDatabase(opCtx, dbName); if (dbt.isOK()) { const auto& dbDoc = dbt.getValue().value; return Status(ErrorCodes::OperationFailed, str::stream() << "can't add shard " << "'" << shardConnectionString.toString() << "'" << " because a local database '" << dbName << "' exists in another " << dbDoc.getPrimary()); } else if (dbt != ErrorCodes::NamespaceNotFound) { return dbt.getStatus(); } } // If a name for a shard wasn't provided, generate one if (shardType.getName().empty()) { auto result = generateNewShardName(opCtx); if (!result.isOK()) { return result.getStatus(); } shardType.setName(result.getValue()); } if (maxSize > 0) { shardType.setMaxSizeMB(maxSize); } // If the minimum allowed version for the cluster is 3.4, set the featureCompatibilityVersion to // 3.4 on the shard. if (serverGlobalParams.featureCompatibility.version.load() == ServerGlobalParams::FeatureCompatibility::Version::k34) { auto versionResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", BSON(FeatureCompatibilityVersion::kCommandName << FeatureCompatibilityVersionCommandParser::kVersion34)); if (!versionResponse.isOK()) { return versionResponse.getStatus(); } if (!versionResponse.getValue().commandStatus.isOK()) { if (versionResponse.getStatus().code() == ErrorCodes::CommandNotFound) { return {ErrorCodes::OperationFailed, "featureCompatibilityVersion for cluster is 3.4, cannot add a shard with " "version below 3.4. See " "http://dochub.mongodb.org/core/3.4-feature-compatibility."}; } return versionResponse.getValue().commandStatus; } } if (!MONGO_FAIL_POINT(dontUpsertShardIdentityOnNewShards)) { auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName()); LOG(2) << "going to insert shardIdentity document into shard: " << shardType; auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest); if (!swCommandResponse.isOK()) { return swCommandResponse.getStatus(); } auto commandResponse = std::move(swCommandResponse.getValue()); BatchedCommandResponse batchResponse; auto batchResponseStatus = Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); if (!batchResponseStatus.isOK()) { return batchResponseStatus; } } log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); Status result = Grid::get(opCtx)->catalogClient(opCtx)->insertConfigDocument( opCtx, ShardType::ConfigNS, shardType.toBSON(), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); return result; } // Add all databases which were discovered on the new shard for (const auto& dbName : dbNamesStatus.getValue()) { DatabaseType dbt; dbt.setName(dbName); dbt.setPrimary(shardType.getName()); dbt.setSharded(false); Status status = Grid::get(opCtx)->catalogClient(opCtx)->updateDatabase(opCtx, dbName, dbt); if (!status.isOK()) { log() << "adding shard " << shardConnectionString.toString() << " even though could not add database " << dbName; } } // Record in changelog BSONObjBuilder shardDetails; shardDetails.append("name", shardType.getName()); shardDetails.append("host", shardConnectionString.toString()); Grid::get(opCtx)->catalogClient(opCtx)->logChange( opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern); // Ensure the added shard is visible to this process. auto shardRegistry = Grid::get(opCtx)->shardRegistry(); if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { return {ErrorCodes::OperationFailed, "Could not find shard metadata for shard after adding it. This most likely " "indicates that the shard was removed immediately after it was added."}; } stopMonitoringGuard.Dismiss(); return shardType.getName(); }