StatusWith<boost::optional<executor::RemoteCommandRequest>> ShardingNetworkConnectionHook::makeRequest(const HostAndPort& remoteHost) { if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { // TODO: SERVER-23973 Temporary crutch until we decide where to get the config server // connection string. return {boost::none}; } auto shard = grid.shardRegistry()->getShardForHostNoReload(remoteHost); if (!shard) { return {ErrorCodes::ShardNotFound, str::stream() << "No shard found for host: " << remoteHost.toString()}; } if (shard->isConfig()) { // No need to initialize sharding metadata if talking to a config server return {boost::none}; } SetShardVersionRequest ssv = SetShardVersionRequest::makeForInitNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shard->getId(), shard->getConnString()); executor::RemoteCommandRequest request; request.dbname = "admin"; request.target = remoteHost; request.timeout = stdx::chrono::seconds{30}; request.cmdObj = ssv.toBSON(); return {request}; }
StatusWith<boost::optional<executor::RemoteCommandRequest>> ShardingNetworkConnectionHook::makeRequest(const HostAndPort& remoteHost) { auto shard = grid.shardRegistry()->getShardForHostNoReload(remoteHost); if (!shard) { return {ErrorCodes::ShardNotFound, str::stream() << "No shard found for host: " << remoteHost.toString()}; } if (shard->isConfig()) { // No need to initialize sharding metadata if talking to a config server return {boost::none}; } SetShardVersionRequest ssv = SetShardVersionRequest::makeForInitNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shard->getId(), shard->getConnString()); executor::RemoteCommandRequest request; request.dbname = "admin"; request.target = remoteHost; request.timeout = Seconds{30}; request.cmdObj = ssv.toBSON(); return {request}; }
void ShardingCatalogManager::shardCollection(OperationContext* opCtx, const NamespaceString& nss, const boost::optional<UUID> uuid, const ShardKeyPattern& fieldsAndOrder, const BSONObj& defaultCollation, bool unique, const vector<BSONObj>& splitPoints, bool isFromMapReduce, const ShardId& dbPrimaryShardId) { const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); const auto primaryShard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); // Fail if there are partially written chunks from a previous failed shardCollection. checkForExistingChunks(opCtx, nss); // Prior to 4.0.5, zones cannot be taken into account at collection sharding time, so ignore // them and let the balancer apply them later const std::vector<TagsType> treatAsNoZonesDefined; // Map/reduce with output to sharded collection ignores consistency checks and requires the // initial chunks to be spread across shards unconditionally const bool treatAsEmpty = isFromMapReduce; // Record start in changelog { BSONObjBuilder collectionDetail; collectionDetail.append("shardKey", fieldsAndOrder.toBSON()); collectionDetail.append("collection", nss.ns()); if (uuid) uuid->appendToBuilder(&collectionDetail, "uuid"); collectionDetail.append("empty", treatAsEmpty); collectionDetail.append("fromMapReduce", isFromMapReduce); collectionDetail.append("primary", primaryShard->toString()); collectionDetail.append("numChunks", static_cast<int>(splitPoints.size() + 1)); uassertStatusOK(ShardingLogging::get(opCtx)->logChangeChecked( opCtx, "shardCollection.start", nss.ns(), collectionDetail.obj(), ShardingCatalogClient::kMajorityWriteConcern)); } // Construct the collection default collator. std::unique_ptr<CollatorInterface> defaultCollator; if (!defaultCollation.isEmpty()) { defaultCollator = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) ->makeFromBSON(defaultCollation)); } const auto initialChunks = InitialSplitPolicy::createFirstChunks(opCtx, nss, fieldsAndOrder, dbPrimaryShardId, splitPoints, treatAsNoZonesDefined, treatAsEmpty); InitialSplitPolicy::writeFirstChunksToConfig(opCtx, initialChunks); { CollectionType coll; coll.setNs(nss); if (uuid) coll.setUUID(*uuid); coll.setEpoch(initialChunks.collVersion().epoch()); coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(initialChunks.collVersion().toLong())); coll.setKeyPattern(fieldsAndOrder.toBSON()); coll.setDefaultCollation(defaultCollator ? defaultCollator->getSpec().toBSON() : BSONObj()); coll.setUnique(unique); uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( opCtx, nss, coll, true /*upsert*/)); } auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); invariant(!shard->isConfig()); // Tell the primary mongod to refresh its data SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( shardRegistry->getConfigServerConnectionString(), dbPrimaryShardId, primaryShard->getConnString(), nss, initialChunks.collVersion(), true /* isAuthoritative */, true /* forceRefresh */); auto ssvResponse = shard->runCommandWithFixedRetryAttempts(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", ssv.toBSON(), Shard::RetryPolicy::kIdempotent); auto status = ssvResponse.isOK() ? std::move(ssvResponse.getValue().commandStatus) : std::move(ssvResponse.getStatus()); if (!status.isOK()) { warning() << "could not update initial version of " << nss.ns() << " on shard primary " << dbPrimaryShardId << causedBy(redact(status)); } ShardingLogging::get(opCtx)->logChange( opCtx, "shardCollection.end", nss.ns(), BSON("version" << initialChunks.collVersion().toString()), ShardingCatalogClient::kMajorityWriteConcern); }
Status ShardingCatalogManager::dropCollection(OperationContext* opCtx, const NamespaceString& nss) { const Status logStatus = ShardingLogging::get(opCtx)->logChangeChecked(opCtx, "dropCollection.start", nss.ns(), BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); if (!logStatus.isOK()) { return logStatus; } const auto catalogClient = Grid::get(opCtx)->catalogClient(); const auto shardsStatus = catalogClient->getAllShards(opCtx, repl::ReadConcernLevel::kLocalReadConcern); if (!shardsStatus.isOK()) { return shardsStatus.getStatus(); } vector<ShardType> allShards = std::move(shardsStatus.getValue().value); LOG(1) << "dropCollection " << nss.ns() << " started"; const auto dropCommandBSON = [opCtx, &nss] { BSONObjBuilder builder; builder.append("drop", nss.coll()); if (!opCtx->getWriteConcern().usedDefault) { builder.append(WriteConcernOptions::kWriteConcernField, opCtx->getWriteConcern().toBSON()); } return builder.obj(); }(); std::map<std::string, BSONObj> errors; auto* const shardRegistry = Grid::get(opCtx)->shardRegistry(); for (const auto& shardEntry : allShards) { auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); if (!swShard.isOK()) { return swShard.getStatus(); } const auto& shard = swShard.getValue(); auto swDropResult = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, nss.db().toString(), dropCommandBSON, Shard::RetryPolicy::kIdempotent); if (!swDropResult.isOK()) { return swDropResult.getStatus().withContext( str::stream() << "Error dropping collection on shard " << shardEntry.getName()); } auto& dropResult = swDropResult.getValue(); auto dropStatus = std::move(dropResult.commandStatus); auto wcStatus = std::move(dropResult.writeConcernStatus); if (!dropStatus.isOK() || !wcStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound && wcStatus.isOK()) { // Generally getting NamespaceNotFound is okay to ignore as it simply means that // the collection has already been dropped or doesn't exist on this shard. // If, however, we get NamespaceNotFound but also have a write concern error then we // can't confirm whether the fact that the namespace doesn't exist is actually // committed. Thus we must still fail on NamespaceNotFound if there is also a write // concern error. This can happen if we call drop, it succeeds but with a write // concern error, then we retry the drop. continue; } errors.emplace(shardEntry.getHost(), std::move(dropResult.response)); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << nss.ns() << " shard data deleted"; // Remove chunk data Status result = catalogClient->removeConfigDocuments(opCtx, ChunkType::ConfigNS, BSON(ChunkType::ns(nss.ns())), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " chunk data deleted"; // Remove tag data result = catalogClient->removeConfigDocuments(opCtx, TagsType::ConfigNS, BSON(TagsType::ns(nss.ns())), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " tag data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(nss); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(Grid::get(opCtx)->getNetwork()->now()); const bool upsert = false; result = ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( opCtx, nss, coll, upsert); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " collection marked as dropped"; for (const auto& shardEntry : allShards) { auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); if (!swShard.isOK()) { return swShard.getStatus(); } const auto& shard = swShard.getValue(); SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( shardRegistry->getConfigServerConnectionString(), shardEntry.getName(), fassert(28781, ConnectionString::parse(shardEntry.getHost())), nss, ChunkVersion::DROPPED(), true /* isAuthoritative */, true /* forceRefresh */); auto ssvResult = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", ssv.toBSON(), Shard::RetryPolicy::kIdempotent); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = std::move(ssvResult.getValue().commandStatus); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", BSON("unsetSharding" << 1), Shard::RetryPolicy::kIdempotent); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = std::move(unsetShardingStatus.getValue().commandStatus); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << nss.ns() << " completed"; ShardingLogging::get(opCtx)->logChange( opCtx, "dropCollection", nss.ns(), BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); return Status::OK(); }
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) { logChange( txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj()); vector<ShardType> allShards; Status status = getAllShards(txn, &allShards); if (!status.isOK()) { return status; } LOG(1) << "dropCollection " << ns << " started"; // Lock the collection globally so that split/migrate cannot run stdx::chrono::seconds waitFor(2); MONGO_FAIL_POINT_BLOCK(setDropCollDistLockWait, customWait) { const BSONObj& data = customWait.getData(); waitFor = stdx::chrono::seconds(data["waitForSecs"].numberInt()); } const stdx::chrono::milliseconds lockTryInterval(500); auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop", waitFor, lockTryInterval); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } LOG(1) << "dropCollection " << ns << " locked"; std::map<string, BSONObj> errors; auto* shardRegistry = grid.shardRegistry(); for (const auto& shardEntry : allShards) { auto dropResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll())); if (!dropResult.isOK()) { return dropResult.getStatus(); } auto dropStatus = getStatusFromCommandResult(dropResult.getValue()); if (!dropStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound) { continue; } errors.emplace(shardEntry.getHost(), dropResult.getValue()); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << ns << " shard data deleted"; // Remove chunk data Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " chunk data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(ns); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now()); result = updateCollection(txn, ns.ns(), coll); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " collection marked as dropped"; for (const auto& shardEntry : allShards) { SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shardEntry.getName(), fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())), ns, ChunkVersion::DROPPED(), true); auto ssvResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", ssv.toBSON()); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue()); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1)); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue()); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << ns << " completed"; logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj()); return Status::OK(); }
Status CatalogManagerReplicaSet::shardCollection(OperationContext* txn, const string& ns, const ShardKeyPattern& fieldsAndOrder, bool unique, const vector<BSONObj>& initPoints, const set<ShardId>& initShardIds) { // Lock the collection globally so that no other mongos can try to shard or drop the collection // at the same time. auto scopedDistLock = getDistLockManager()->lock(ns, "shardCollection"); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } auto status = getDatabase(txn, nsToDatabase(ns)); if (!status.isOK()) { return status.getStatus(); } ShardId dbPrimaryShardId = status.getValue().value.getPrimary(); const auto primaryShard = grid.shardRegistry()->getShard(txn, dbPrimaryShardId); { // In 3.0 and prior we include this extra safety check that the collection is not getting // sharded concurrently by two different mongos instances. It is not 100%-proof, but it // reduces the chance that two invocations of shard collection will step on each other's // toes. Now we take the distributed lock so going forward this check won't be necessary // but we leave it around for compatibility with other mongoses from 3.0. // TODO(spencer): Remove this after 3.2 ships. auto countStatus = _runCountCommandOnConfig( txn, NamespaceString(ChunkType::ConfigNS), BSON(ChunkType::ns(ns))); if (!countStatus.isOK()) { return countStatus.getStatus(); } if (countStatus.getValue() > 0) { return Status(ErrorCodes::AlreadyInitialized, str::stream() << "collection " << ns << " already sharded with " << countStatus.getValue() << " chunks."); } } // Record start in changelog { BSONObjBuilder collectionDetail; collectionDetail.append("shardKey", fieldsAndOrder.toBSON()); collectionDetail.append("collection", ns); collectionDetail.append("primary", primaryShard->toString()); { BSONArrayBuilder initialShards(collectionDetail.subarrayStart("initShards")); for (const ShardId& shardId : initShardIds) { initialShards.append(shardId); } } collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1)); logChange(txn, txn->getClient()->clientAddress(true), "shardCollection.start", ns, collectionDetail.obj()); } shared_ptr<ChunkManager> manager(new ChunkManager(ns, fieldsAndOrder, unique)); manager->createFirstChunks(txn, dbPrimaryShardId, &initPoints, &initShardIds); manager->loadExistingRanges(txn, nullptr); CollectionInfo collInfo; collInfo.useChunkManager(manager); collInfo.save(txn, ns); manager->reload(txn, true); // Tell the primary mongod to refresh its data // TODO: Think the real fix here is for mongos to just // assume that all collections are sharded, when we get there SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), dbPrimaryShardId, primaryShard->getConnString(), NamespaceString(ns), manager->getVersion(), true); auto ssvStatus = grid.shardRegistry()->runCommandWithNotMasterRetries( txn, dbPrimaryShardId, "admin", ssv.toBSON()); if (!ssvStatus.isOK()) { warning() << "could not update initial version of " << ns << " on shard primary " << dbPrimaryShardId << ssvStatus.getStatus(); } logChange(txn, txn->getClient()->clientAddress(true), "shardCollection", ns, BSON("version" << manager->getVersion().toString())); return Status::OK(); }
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) { logChange( txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj()); vector<ShardType> allShards; Status status = getAllShards(txn, &allShards); if (!status.isOK()) { return status; } LOG(1) << "dropCollection " << ns << " started"; // Lock the collection globally so that split/migrate cannot run auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop"); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } LOG(1) << "dropCollection " << ns << " locked"; std::map<string, BSONObj> errors; auto* shardRegistry = grid.shardRegistry(); for (const auto& shardEntry : allShards) { auto dropResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll())); if (!dropResult.isOK()) { return dropResult.getStatus(); } auto dropStatus = getStatusFromCommandResult(dropResult.getValue()); if (!dropStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound) { continue; } errors.emplace(shardEntry.getHost(), dropResult.getValue()); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << ns << " shard data deleted"; // Remove chunk data Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " chunk data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(ns); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now()); result = updateCollection(txn, ns.ns(), coll); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " collection marked as dropped"; // We just called updateCollection above and this would have advanced the config op time, so use // the latest value. On the MongoD side, we need to load the latest config metadata, which // indicates that the collection was dropped. const ChunkVersionAndOpTime droppedVersion(ChunkVersion::DROPPED(), grid.shardRegistry()->getConfigOpTime()); for (const auto& shardEntry : allShards) { SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shardEntry.getName(), fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())), ns, droppedVersion, true); auto ssvResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", ssv.toBSON()); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue()); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1)); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue()); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << ns << " completed"; logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj()); return Status::OK(); }