void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder, const NamespaceString& nss, ChunkVersion chunkVersion, const ConnectionString& configServerConnectionString, const ShardId& fromShardId, const ShardId& toShardId, const ChunkRange& range, int64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); builder->append(kMoveChunk, nss.ns()); chunkVersion.appendToCommand(builder); // 3.4 shard compatibility builder->append(kEpoch, chunkVersion.epoch()); // config connection string is included for 3.4 shard compatibility builder->append(kConfigServerConnectionString, configServerConnectionString.toString()); builder->append(kFromShardId, fromShardId.toString()); builder->append(kToShardId, toShardId.toString()); range.append(builder); builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes)); secondaryThrottle.append(builder); builder->append(kWaitForDelete, waitForDelete); builder->append(kTakeDistLock, false); }
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder, const NamespaceString& nss, const ChunkVersion& shardVersion, const ConnectionString& configServerConnectionString, const ShardId& fromShardId, const ShardId& toShardId, const ChunkRange& range, int64_t maxChunkSizeBytes, const MigrationSecondaryThrottleOptions& secondaryThrottle, bool waitForDelete, bool takeDistLock) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); builder->append(kMoveChunk, nss.ns()); shardVersion.appendForCommands(builder); builder->append(kConfigServerConnectionString, configServerConnectionString.toString()); builder->append(kFromShardId, fromShardId.toString()); builder->append(kToShardId, toShardId.toString()); range.append(builder); builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes)); secondaryThrottle.append(builder); builder->append(kWaitForDelete, waitForDelete); builder->append(kTakeDistLock, takeDistLock); }
void StartChunkCloneRequest::appendAsCommand( BSONObjBuilder* builder, const NamespaceString& nss, const MigrationSessionId& sessionId, const ConnectionString& configServerConnectionString, const ConnectionString& fromShardConnectionString, const ShardId& fromShardId, const ShardId& toShardId, const BSONObj& chunkMinKey, const BSONObj& chunkMaxKey, const BSONObj& shardKeyPattern, const MigrationSecondaryThrottleOptions& secondaryThrottle) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); invariant(fromShardConnectionString.isValid()); builder->append(kRecvChunkStart, nss.ns()); sessionId.append(builder); builder->append(kConfigServerConnectionString, configServerConnectionString.toString()); builder->append(kFromShardConnectionString, fromShardConnectionString.toString()); builder->append(kFromShardId, fromShardId.toString()); builder->append(kToShardId, toShardId.toString()); builder->append(kChunkMinKey, chunkMinKey); builder->append(kChunkMaxKey, chunkMaxKey); builder->append(kShardKeyPattern, shardKeyPattern); secondaryThrottle.append(builder); }
StatusWith<std::vector<std::string>> ShardingCatalogManager::getDatabasesForShard( OperationContext* opCtx, const ShardId& shardId) { auto findStatus = Grid::get(opCtx)->catalogClient()->_exhaustiveFindOnConfig( opCtx, kConfigReadSelector, repl::ReadConcernLevel::kLocalReadConcern, DatabaseType::ConfigNS, BSON(DatabaseType::primary(shardId.toString())), BSONObj(), boost::none); // no limit if (!findStatus.isOK()) return findStatus.getStatus(); std::vector<std::string> dbs; for (const BSONObj& obj : findStatus.getValue().value) { std::string dbName; Status status = bsonExtractStringField(obj, DatabaseType::name(), &dbName); if (!status.isOK()) { return status; } dbs.push_back(dbName); } return dbs; }
void CommitChunkMigrationRequest::appendAsCommand( BSONObjBuilder* builder, const NamespaceString& nss, const ShardId& fromShard, const ShardId& toShard, const ChunkType& migratedChunkType, const boost::optional<ChunkType>& controlChunkType) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); builder->append(kConfigSvrCommitChunkMigration, nss.ns()); builder->append(kFromShard, fromShard.toString()); builder->append(kToShard, toShard.toString()); builder->append(kMigratedChunk, migratedChunkType.toBSON()); if (controlChunkType) { builder->append(kControlChunk, controlChunkType->toBSON()); } }
void appendWriteConcernErrorToCmdResponse(const ShardId& shardId, const BSONElement& wcErrorElem, BSONObjBuilder& responseBuilder) { WriteConcernErrorDetail wcError; std::string errMsg; auto wcErrorObj = wcErrorElem.Obj(); if (!wcError.parseBSON(wcErrorObj, &errMsg)) { wcError.clear(); wcError.setStatus({ErrorCodes::FailedToParse, "Failed to parse writeConcernError: " + wcErrorObj.toString() + ", Received error: " + errMsg}); } auto status = wcError.toStatus(); wcError.setStatus( status.withReason(str::stream() << status.reason() << " at " << shardId.toString())); responseBuilder.append("writeConcernError", wcError.toBSON()); }
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints( OperationContext* txn, const ShardId& shardId, const NamespaceString& nss, const ShardKeyPattern& shardKeyPattern, ChunkVersion collectionVersion, const BSONObj& minKey, const BSONObj& maxKey, const std::vector<BSONObj>& splitPoints) { invariant(!splitPoints.empty()); invariant(minKey.woCompare(maxKey) < 0); const size_t kMaxSplitPoints = 8192; if (splitPoints.size() > kMaxSplitPoints) { return {ErrorCodes::BadValue, str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints << " parts at a time."}; } BSONObjBuilder cmd; cmd.append("splitChunk", nss.ns()); cmd.append("configdb", Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString()); cmd.append("from", shardId.toString()); cmd.append("keyPattern", shardKeyPattern.toBSON()); collectionVersion.appendForCommands(&cmd); cmd.append(kMinKey, minKey); cmd.append(kMaxKey, maxKey); cmd.append("splitKeys", splitPoints); BSONObj cmdObj = cmd.obj(); Status status{ErrorCodes::InternalError, "Uninitialized value"}; BSONObj cmdResponse; auto shard = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); if (!shard) { status = Status(ErrorCodes::ShardNotFound, str::stream() << "shard " << shardId << " not found"); } else { auto cmdStatus = shard->runCommand(txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", cmdObj, Shard::RetryPolicy::kNotIdempotent); if (!cmdStatus.isOK()) { status = std::move(cmdStatus.getStatus()); } else { status = std::move(cmdStatus.getValue().commandStatus); cmdResponse = std::move(cmdStatus.getValue().response); } } if (!status.isOK()) { log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status)); return {status.code(), str::stream() << "split failed due to " << status.toString()}; } BSONElement shouldMigrateElement; status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement); if (status.isOK()) { auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject()); if (!chunkRangeStatus.isOK()) { return chunkRangeStatus.getStatus(); } return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue())); } else if (status != ErrorCodes::NoSuchKey) { warning() << "Chunk migration will be skipped because splitChunk returned invalid response: " << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed" << causedBy(redact(status)); } return boost::optional<ChunkRange>(); }
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints( OperationContext* txn, const ShardId& shardId, const NamespaceString& nss, const ShardKeyPattern& shardKeyPattern, ChunkVersion collectionVersion, const ChunkRange& chunkRange, const std::vector<BSONObj>& splitPoints) { invariant(!splitPoints.empty()); const size_t kMaxSplitPoints = 8192; if (splitPoints.size() > kMaxSplitPoints) { return {ErrorCodes::BadValue, str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints << " parts at a time."}; } // Sanity check that we are not attempting to split at the boundaries of the chunk. This check // is already performed at chunk split commit time, but we are performing it here for parity // with old auto-split code, which might rely on it. if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMin() == splitPoints.front())) { const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString() << ", split point " << splitPoints.front() << " is exactly on chunk bounds"); return {ErrorCodes::CannotSplit, msg}; } if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMax() == splitPoints.back())) { const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString() << ", split point " << splitPoints.back() << " is exactly on chunk bounds"); return {ErrorCodes::CannotSplit, msg}; } BSONObjBuilder cmd; cmd.append("splitChunk", nss.ns()); cmd.append("configdb", Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString()); cmd.append("from", shardId.toString()); cmd.append("keyPattern", shardKeyPattern.toBSON()); collectionVersion.appendForCommands(&cmd); chunkRange.append(&cmd); cmd.append("splitKeys", splitPoints); BSONObj cmdObj = cmd.obj(); Status status{ErrorCodes::InternalError, "Uninitialized value"}; BSONObj cmdResponse; auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); if (!shardStatus.isOK()) { status = shardStatus.getStatus(); } else { auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts( txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", cmdObj, Shard::RetryPolicy::kNotIdempotent); if (!cmdStatus.isOK()) { status = std::move(cmdStatus.getStatus()); } else { status = std::move(cmdStatus.getValue().commandStatus); cmdResponse = std::move(cmdStatus.getValue().response); } } if (!status.isOK()) { log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status)); return {status.code(), str::stream() << "split failed due to " << status.toString()}; } BSONElement shouldMigrateElement; status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement); if (status.isOK()) { auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject()); if (!chunkRangeStatus.isOK()) { return chunkRangeStatus.getStatus(); } return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue())); } else if (status != ErrorCodes::NoSuchKey) { warning() << "Chunk migration will be skipped because splitChunk returned invalid response: " << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed" << causedBy(redact(status)); } return boost::optional<ChunkRange>(); }
StatusWith<ShardDrainingStatus> ShardingCatalogManager::removeShard(OperationContext* opCtx, const ShardId& shardId) { // Check preconditions for removing the shard std::string name = shardId.toString(); auto countStatus = _runCountCommandOnConfig( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << NE << name << ShardType::draining(true))); if (!countStatus.isOK()) { return countStatus.getStatus(); } if (countStatus.getValue() > 0) { return Status(ErrorCodes::ConflictingOperationInProgress, "Can't have more than one draining shard at a time"); } countStatus = _runCountCommandOnConfig(opCtx, ShardType::ConfigNS, BSON(ShardType::name() << NE << name)); if (!countStatus.isOK()) { return countStatus.getStatus(); } if (countStatus.getValue() == 0) { return Status(ErrorCodes::IllegalOperation, "Can't remove last shard"); } // Figure out if shard is already draining countStatus = _runCountCommandOnConfig( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name << ShardType::draining(true))); if (!countStatus.isOK()) { return countStatus.getStatus(); } auto* const shardRegistry = Grid::get(opCtx)->shardRegistry(); if (countStatus.getValue() == 0) { log() << "going to start draining shard: " << name; // Record start in changelog const Status logStatus = Grid::get(opCtx)->catalogClient()->logChangeChecked( opCtx, "removeShard.start", "", BSON("shard" << name), ShardingCatalogClient::kLocalWriteConcern); if (!logStatus.isOK()) { return logStatus; } auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name), BSON("$set" << BSON(ShardType::draining(true))), false, ShardingCatalogClient::kLocalWriteConcern); if (!updateStatus.isOK()) { log() << "error starting removeShard: " << name << causedBy(redact(updateStatus.getStatus())); return updateStatus.getStatus(); } shardRegistry->reload(opCtx); return ShardDrainingStatus::STARTED; } // Draining has already started, now figure out how many chunks and databases are still on the // shard. countStatus = _runCountCommandOnConfig(opCtx, ChunkType::ConfigNS, BSON(ChunkType::shard(name))); if (!countStatus.isOK()) { return countStatus.getStatus(); } const long long chunkCount = countStatus.getValue(); countStatus = _runCountCommandOnConfig(opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::primary(name))); if (!countStatus.isOK()) { return countStatus.getStatus(); } const long long databaseCount = countStatus.getValue(); if (chunkCount > 0 || databaseCount > 0) { // Still more draining to do LOG(0) << "chunkCount: " << chunkCount; LOG(0) << "databaseCount: " << databaseCount; return ShardDrainingStatus::ONGOING; } // Draining is done, now finish removing the shard. log() << "going to remove shard: " << name; audit::logRemoveShard(opCtx->getClient(), name); Status status = Grid::get(opCtx)->catalogClient()->removeConfigDocuments( opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name), ShardingCatalogClient::kLocalWriteConcern); if (!status.isOK()) { log() << "Error concluding removeShard operation on: " << name << "; err: " << status.reason(); return status; } shardConnectionPool.removeHost(name); ReplicaSetMonitor::remove(name); shardRegistry->reload(opCtx); // Record finish in changelog Grid::get(opCtx)->catalogClient()->logChange( opCtx, "removeShard", "", BSON("shard" << name), ShardingCatalogClient::kLocalWriteConcern); return ShardDrainingStatus::COMPLETED; }