/** * Stores ranges for a particular collection and shard starting from some version */ void storeCollectionRanges(const NamespaceString& nss, const string& shardName, const vector<KeyRange>& ranges, const ChunkVersion& startVersion) { // Get key pattern from first range ASSERT_GREATER_THAN(ranges.size(), 0u); CollectionType coll; coll.setNs(nss); coll.setKeyPattern(ranges.begin()->keyPattern); coll.setEpoch(startVersion.epoch()); coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(1)); ASSERT_OK(coll.validate()); DBDirectClient client(&_txn); client.update(CollectionType::ConfigNS, BSON(CollectionType::fullNs(coll.getNs().ns())), coll.toBSON(), true, false); ChunkVersion nextVersion = startVersion; for (vector<KeyRange>::const_iterator it = ranges.begin(); it != ranges.end(); ++it) { ChunkType chunk; // TODO: We should not rely on the serialized ns, minkey being unique in the future, // causes problems since it links string serialization to correctness. chunk.setName(Chunk::genID(nss.ns(), it->minKey)); chunk.setShard(shardName); chunk.setNS(nss.ns()); chunk.setVersion(nextVersion); chunk.setMin(it->minKey); chunk.setMax(it->maxKey); nextVersion.incMajor(); client.insert(ChunkType::ConfigNS, chunk.toBSON()); } }
void CollectionInfo::save(OperationContext* txn, const string& ns) { CollectionType coll; coll.setNs(NamespaceString{ns}); if (_cm) { invariant(!_dropped); coll.setEpoch(_cm->getVersion().epoch()); // TODO(schwerin): The following isn't really a date, but is stored as one in-memory and // in config.collections, as a historical oddity. coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(_cm->getVersion().toLong())); coll.setKeyPattern(_cm->getShardKeyPattern().toBSON()); coll.setDefaultCollation(_cm->getDefaultCollation()); coll.setUnique(_cm->isUnique()); } else { invariant(_dropped); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(Date_t::now()); } uassertStatusOK(grid.catalogClient(txn)->updateCollection(txn, ns, coll)); _dirty = false; }
void ShardingCatalogManager::shardCollection(OperationContext* opCtx, const NamespaceString& nss, const boost::optional<UUID> uuid, const ShardKeyPattern& fieldsAndOrder, const BSONObj& defaultCollation, bool unique, const vector<BSONObj>& splitPoints, bool isFromMapReduce, const ShardId& dbPrimaryShardId) { const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); const auto primaryShard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); // Fail if there are partially written chunks from a previous failed shardCollection. checkForExistingChunks(opCtx, nss); // Prior to 4.0.5, zones cannot be taken into account at collection sharding time, so ignore // them and let the balancer apply them later const std::vector<TagsType> treatAsNoZonesDefined; // Map/reduce with output to sharded collection ignores consistency checks and requires the // initial chunks to be spread across shards unconditionally const bool treatAsEmpty = isFromMapReduce; // Record start in changelog { BSONObjBuilder collectionDetail; collectionDetail.append("shardKey", fieldsAndOrder.toBSON()); collectionDetail.append("collection", nss.ns()); if (uuid) uuid->appendToBuilder(&collectionDetail, "uuid"); collectionDetail.append("empty", treatAsEmpty); collectionDetail.append("fromMapReduce", isFromMapReduce); collectionDetail.append("primary", primaryShard->toString()); collectionDetail.append("numChunks", static_cast<int>(splitPoints.size() + 1)); uassertStatusOK(ShardingLogging::get(opCtx)->logChangeChecked( opCtx, "shardCollection.start", nss.ns(), collectionDetail.obj(), ShardingCatalogClient::kMajorityWriteConcern)); } // Construct the collection default collator. std::unique_ptr<CollatorInterface> defaultCollator; if (!defaultCollation.isEmpty()) { defaultCollator = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) ->makeFromBSON(defaultCollation)); } const auto initialChunks = InitialSplitPolicy::createFirstChunks(opCtx, nss, fieldsAndOrder, dbPrimaryShardId, splitPoints, treatAsNoZonesDefined, treatAsEmpty); InitialSplitPolicy::writeFirstChunksToConfig(opCtx, initialChunks); { CollectionType coll; coll.setNs(nss); if (uuid) coll.setUUID(*uuid); coll.setEpoch(initialChunks.collVersion().epoch()); coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(initialChunks.collVersion().toLong())); coll.setKeyPattern(fieldsAndOrder.toBSON()); coll.setDefaultCollation(defaultCollator ? defaultCollator->getSpec().toBSON() : BSONObj()); coll.setUnique(unique); uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( opCtx, nss, coll, true /*upsert*/)); } auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); invariant(!shard->isConfig()); // Tell the primary mongod to refresh its data SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( shardRegistry->getConfigServerConnectionString(), dbPrimaryShardId, primaryShard->getConnString(), nss, initialChunks.collVersion(), true /* isAuthoritative */, true /* forceRefresh */); auto ssvResponse = shard->runCommandWithFixedRetryAttempts(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", ssv.toBSON(), Shard::RetryPolicy::kIdempotent); auto status = ssvResponse.isOK() ? std::move(ssvResponse.getValue().commandStatus) : std::move(ssvResponse.getStatus()); if (!status.isOK()) { warning() << "could not update initial version of " << nss.ns() << " on shard primary " << dbPrimaryShardId << causedBy(redact(status)); } ShardingLogging::get(opCtx)->logChange( opCtx, "shardCollection.end", nss.ns(), BSON("version" << initialChunks.collVersion().toString()), ShardingCatalogClient::kMajorityWriteConcern); }
Status ShardingCatalogManager::dropCollection(OperationContext* opCtx, const NamespaceString& nss) { const Status logStatus = ShardingLogging::get(opCtx)->logChangeChecked(opCtx, "dropCollection.start", nss.ns(), BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); if (!logStatus.isOK()) { return logStatus; } const auto catalogClient = Grid::get(opCtx)->catalogClient(); const auto shardsStatus = catalogClient->getAllShards(opCtx, repl::ReadConcernLevel::kLocalReadConcern); if (!shardsStatus.isOK()) { return shardsStatus.getStatus(); } vector<ShardType> allShards = std::move(shardsStatus.getValue().value); LOG(1) << "dropCollection " << nss.ns() << " started"; const auto dropCommandBSON = [opCtx, &nss] { BSONObjBuilder builder; builder.append("drop", nss.coll()); if (!opCtx->getWriteConcern().usedDefault) { builder.append(WriteConcernOptions::kWriteConcernField, opCtx->getWriteConcern().toBSON()); } return builder.obj(); }(); std::map<std::string, BSONObj> errors; auto* const shardRegistry = Grid::get(opCtx)->shardRegistry(); for (const auto& shardEntry : allShards) { auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); if (!swShard.isOK()) { return swShard.getStatus(); } const auto& shard = swShard.getValue(); auto swDropResult = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, nss.db().toString(), dropCommandBSON, Shard::RetryPolicy::kIdempotent); if (!swDropResult.isOK()) { return swDropResult.getStatus().withContext( str::stream() << "Error dropping collection on shard " << shardEntry.getName()); } auto& dropResult = swDropResult.getValue(); auto dropStatus = std::move(dropResult.commandStatus); auto wcStatus = std::move(dropResult.writeConcernStatus); if (!dropStatus.isOK() || !wcStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound && wcStatus.isOK()) { // Generally getting NamespaceNotFound is okay to ignore as it simply means that // the collection has already been dropped or doesn't exist on this shard. // If, however, we get NamespaceNotFound but also have a write concern error then we // can't confirm whether the fact that the namespace doesn't exist is actually // committed. Thus we must still fail on NamespaceNotFound if there is also a write // concern error. This can happen if we call drop, it succeeds but with a write // concern error, then we retry the drop. continue; } errors.emplace(shardEntry.getHost(), std::move(dropResult.response)); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << nss.ns() << " shard data deleted"; // Remove chunk data Status result = catalogClient->removeConfigDocuments(opCtx, ChunkType::ConfigNS, BSON(ChunkType::ns(nss.ns())), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " chunk data deleted"; // Remove tag data result = catalogClient->removeConfigDocuments(opCtx, TagsType::ConfigNS, BSON(TagsType::ns(nss.ns())), ShardingCatalogClient::kMajorityWriteConcern); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " tag data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(nss); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(Grid::get(opCtx)->getNetwork()->now()); const bool upsert = false; result = ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( opCtx, nss, coll, upsert); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << nss.ns() << " collection marked as dropped"; for (const auto& shardEntry : allShards) { auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); if (!swShard.isOK()) { return swShard.getStatus(); } const auto& shard = swShard.getValue(); SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( shardRegistry->getConfigServerConnectionString(), shardEntry.getName(), fassert(28781, ConnectionString::parse(shardEntry.getHost())), nss, ChunkVersion::DROPPED(), true /* isAuthoritative */, true /* forceRefresh */); auto ssvResult = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", ssv.toBSON(), Shard::RetryPolicy::kIdempotent); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = std::move(ssvResult.getValue().commandStatus); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shard->runCommandWithFixedRetryAttempts( opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", BSON("unsetSharding" << 1), Shard::RetryPolicy::kIdempotent); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = std::move(unsetShardingStatus.getValue().commandStatus); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << nss.ns() << " completed"; ShardingLogging::get(opCtx)->logChange( opCtx, "dropCollection", nss.ns(), BSONObj(), ShardingCatalogClient::kMajorityWriteConcern); return Status::OK(); }
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) { logChange( txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj()); vector<ShardType> allShards; Status status = getAllShards(txn, &allShards); if (!status.isOK()) { return status; } LOG(1) << "dropCollection " << ns << " started"; // Lock the collection globally so that split/migrate cannot run stdx::chrono::seconds waitFor(2); MONGO_FAIL_POINT_BLOCK(setDropCollDistLockWait, customWait) { const BSONObj& data = customWait.getData(); waitFor = stdx::chrono::seconds(data["waitForSecs"].numberInt()); } const stdx::chrono::milliseconds lockTryInterval(500); auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop", waitFor, lockTryInterval); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } LOG(1) << "dropCollection " << ns << " locked"; std::map<string, BSONObj> errors; auto* shardRegistry = grid.shardRegistry(); for (const auto& shardEntry : allShards) { auto dropResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll())); if (!dropResult.isOK()) { return dropResult.getStatus(); } auto dropStatus = getStatusFromCommandResult(dropResult.getValue()); if (!dropStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound) { continue; } errors.emplace(shardEntry.getHost(), dropResult.getValue()); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << ns << " shard data deleted"; // Remove chunk data Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " chunk data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(ns); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now()); result = updateCollection(txn, ns.ns(), coll); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " collection marked as dropped"; for (const auto& shardEntry : allShards) { SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shardEntry.getName(), fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())), ns, ChunkVersion::DROPPED(), true); auto ssvResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", ssv.toBSON()); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue()); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1)); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue()); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << ns << " completed"; logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj()); return Status::OK(); }
Status CatalogManagerReplicaSet::dropCollection(OperationContext* txn, const NamespaceString& ns) { logChange( txn, txn->getClient()->clientAddress(true), "dropCollection.start", ns.ns(), BSONObj()); vector<ShardType> allShards; Status status = getAllShards(txn, &allShards); if (!status.isOK()) { return status; } LOG(1) << "dropCollection " << ns << " started"; // Lock the collection globally so that split/migrate cannot run auto scopedDistLock = getDistLockManager()->lock(ns.ns(), "drop"); if (!scopedDistLock.isOK()) { return scopedDistLock.getStatus(); } LOG(1) << "dropCollection " << ns << " locked"; std::map<string, BSONObj> errors; auto* shardRegistry = grid.shardRegistry(); for (const auto& shardEntry : allShards) { auto dropResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), ns.db().toString(), BSON("drop" << ns.coll())); if (!dropResult.isOK()) { return dropResult.getStatus(); } auto dropStatus = getStatusFromCommandResult(dropResult.getValue()); if (!dropStatus.isOK()) { if (dropStatus.code() == ErrorCodes::NamespaceNotFound) { continue; } errors.emplace(shardEntry.getHost(), dropResult.getValue()); } } if (!errors.empty()) { StringBuilder sb; sb << "Dropping collection failed on the following hosts: "; for (auto it = errors.cbegin(); it != errors.cend(); ++it) { if (it != errors.cbegin()) { sb << ", "; } sb << it->first << ": " << it->second; } return {ErrorCodes::OperationFailed, sb.str()}; } LOG(1) << "dropCollection " << ns << " shard data deleted"; // Remove chunk data Status result = remove(txn, ChunkType::ConfigNS, BSON(ChunkType::ns(ns.ns())), 0, nullptr); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " chunk data deleted"; // Mark the collection as dropped CollectionType coll; coll.setNs(ns); coll.setDropped(true); coll.setEpoch(ChunkVersion::DROPPED().epoch()); coll.setUpdatedAt(grid.shardRegistry()->getNetwork()->now()); result = updateCollection(txn, ns.ns(), coll); if (!result.isOK()) { return result; } LOG(1) << "dropCollection " << ns << " collection marked as dropped"; // We just called updateCollection above and this would have advanced the config op time, so use // the latest value. On the MongoD side, we need to load the latest config metadata, which // indicates that the collection was dropped. const ChunkVersionAndOpTime droppedVersion(ChunkVersion::DROPPED(), grid.shardRegistry()->getConfigOpTime()); for (const auto& shardEntry : allShards) { SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( grid.shardRegistry()->getConfigServerConnectionString(), shardEntry.getName(), fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())), ns, droppedVersion, true); auto ssvResult = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", ssv.toBSON()); if (!ssvResult.isOK()) { return ssvResult.getStatus(); } auto ssvStatus = getStatusFromCommandResult(ssvResult.getValue()); if (!ssvStatus.isOK()) { return ssvStatus; } auto unsetShardingStatus = shardRegistry->runCommandWithNotMasterRetries( txn, shardEntry.getName(), "admin", BSON("unsetSharding" << 1)); if (!unsetShardingStatus.isOK()) { return unsetShardingStatus.getStatus(); } auto unsetShardingResult = getStatusFromCommandResult(unsetShardingStatus.getValue()); if (!unsetShardingResult.isOK()) { return unsetShardingResult; } } LOG(1) << "dropCollection " << ns << " completed"; logChange(txn, txn->getClient()->clientAddress(true), "dropCollection", ns.ns(), BSONObj()); return Status::OK(); }