void MetadataManager::beginReceive(const ChunkRange& range) { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); // Collection is not known to be sharded if the active metadata tracker is null invariant(_activeMetadataTracker); // If range is contained within pending chunks, this means a previous migration must have failed // and we need to clean all overlaps RangeVector overlappedChunks; getRangeMapOverlap(_receivingChunks, range.getMin(), range.getMax(), &overlappedChunks); for (const auto& overlapChunkMin : overlappedChunks) { auto itRecv = _receivingChunks.find(overlapChunkMin.first); invariant(itRecv != _receivingChunks.end()); const ChunkRange receivingRange(itRecv->first, itRecv->second); _receivingChunks.erase(itRecv); // Make sure any potentially partially copied chunks are scheduled to be cleaned up _addRangeToClean_inlock(receivingRange); } // Need to ensure that the background range deleter task won't delete the range we are about to // receive _removeRangeToClean_inlock(range); _receivingChunks.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned())); // For compatibility with the current range deleter, update the pending chunks on the collection // metadata to include the chunk being received ChunkType chunk; chunk.setMin(range.getMin()); chunk.setMax(range.getMax()); _setActiveMetadata_inlock(_activeMetadataTracker->metadata->clonePlusPending(chunk)); }
void MetadataManager::_removeRangeToClean_inlock(const ChunkRange& range) { auto it = _rangesToClean.upper_bound(range.getMin()); // We want our iterator to point at the greatest value // that is still less than or equal to range. if (it != _rangesToClean.begin()) { --it; } for (; it != _rangesToClean.end() && it->first < range.getMax();) { if (it->second <= range.getMin()) { ++it; continue; } // There's overlap between *it and range so we remove *it // and then replace with new ranges. BSONObj oldMin = it->first, oldMax = it->second; _rangesToClean.erase(it++); if (oldMin < range.getMin()) { _addRangeToClean_inlock(ChunkRange(oldMin, range.getMin())); } if (oldMax > range.getMax()) { _addRangeToClean_inlock(ChunkRange(range.getMax(), oldMax)); } } }
ChunkRange::ChunkRange(const ChunkRange& min, const ChunkRange& max) : _manager(min.getManager()), _shardId(min.getShardId()), _min(min.getMin()), _max(max.getMax()) { invariant(min.getShardId() == max.getShardId()); invariant(min.getManager() == max.getManager()); invariant(min.getMax() == max.getMin()); }
void MetadataManager::_addRangeToClean_inlock(const ChunkRange& range) { invariant(!rangeMapOverlaps(_rangesToClean, range.getMin(), range.getMax())); invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax())); _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned())); // If _rangesToClean was previously empty, we need to start the collection range deleter if (_rangesToClean.size() == 1UL) { ShardingState::get(_serviceContext)->scheduleCleanup(_nss); } }
std::shared_ptr<Notification<Status>> MetadataManager::_addRangeToClean_inlock( const ChunkRange& range) { // This first invariant currently makes an unnecessary copy, to reuse the // rangeMapOverlaps helper function. invariant(!rangeMapOverlaps(_getCopyOfRangesToClean_inlock(), range.getMin(), range.getMax())); invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax())); RangeToCleanDescriptor descriptor(range.getMax().getOwned()); _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), descriptor)); // If _rangesToClean was previously empty, we need to start the collection range deleter if (_rangesToClean.size() == 1UL) { ShardingState::get(_serviceContext)->scheduleCleanup(_nss); } return descriptor.getNotification(); }
void MetadataManager::forgetReceive(const ChunkRange& range) { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); { auto it = _receivingChunks.find(range.getMin()); invariant(it != _receivingChunks.end()); // Verify entire ChunkRange is identical, not just the min key. invariant(it->second == range.getMax()); _receivingChunks.erase(it); } // This is potentially a partially received data, which needs to be cleaned up _addRangeToClean_inlock(range); // For compatibility with the current range deleter, update the pending chunks on the collection // metadata to exclude the chunk being received, which was added in beginReceive ChunkType chunk; chunk.setMin(range.getMin()); chunk.setMax(range.getMax()); _setActiveMetadata_inlock(_activeMetadataTracker->metadata->cloneMinusPending(chunk)); }
void MetadataManager::_addRangeToClean_inlock(const ChunkRange& range) { invariant(!rangeMapOverlaps(_rangesToClean, range.getMin(), range.getMax())); invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax())); _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned())); }
ChunkType::ChunkType(NamespaceString nss, ChunkRange range, ChunkVersion version, ShardId shardId) : _nss(nss), _min(range.getMin()), _max(range.getMax()), _version(version), _shard(std::move(shardId)) {}
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints( OperationContext* txn, const ShardId& shardId, const NamespaceString& nss, const ShardKeyPattern& shardKeyPattern, ChunkVersion collectionVersion, const ChunkRange& chunkRange, const std::vector<BSONObj>& splitPoints) { invariant(!splitPoints.empty()); const size_t kMaxSplitPoints = 8192; if (splitPoints.size() > kMaxSplitPoints) { return {ErrorCodes::BadValue, str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints << " parts at a time."}; } // Sanity check that we are not attempting to split at the boundaries of the chunk. This check // is already performed at chunk split commit time, but we are performing it here for parity // with old auto-split code, which might rely on it. if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMin() == splitPoints.front())) { const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString() << ", split point " << splitPoints.front() << " is exactly on chunk bounds"); return {ErrorCodes::CannotSplit, msg}; } if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMax() == splitPoints.back())) { const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString() << ", split point " << splitPoints.back() << " is exactly on chunk bounds"); return {ErrorCodes::CannotSplit, msg}; } BSONObjBuilder cmd; cmd.append("splitChunk", nss.ns()); cmd.append("configdb", Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString()); cmd.append("from", shardId.toString()); cmd.append("keyPattern", shardKeyPattern.toBSON()); collectionVersion.appendForCommands(&cmd); chunkRange.append(&cmd); cmd.append("splitKeys", splitPoints); BSONObj cmdObj = cmd.obj(); Status status{ErrorCodes::InternalError, "Uninitialized value"}; BSONObj cmdResponse; auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId); if (!shardStatus.isOK()) { status = shardStatus.getStatus(); } else { auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts( txn, ReadPreferenceSetting{ReadPreference::PrimaryOnly}, "admin", cmdObj, Shard::RetryPolicy::kNotIdempotent); if (!cmdStatus.isOK()) { status = std::move(cmdStatus.getStatus()); } else { status = std::move(cmdStatus.getValue().commandStatus); cmdResponse = std::move(cmdStatus.getValue().response); } } if (!status.isOK()) { log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status)); return {status.code(), str::stream() << "split failed due to " << status.toString()}; } BSONElement shouldMigrateElement; status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement); if (status.isOK()) { auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject()); if (!chunkRangeStatus.isOK()) { return chunkRangeStatus.getStatus(); } return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue())); } else if (status != ErrorCodes::NoSuchKey) { warning() << "Chunk migration will be skipped because splitChunk returned invalid response: " << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed" << causedBy(redact(status)); } return boost::optional<ChunkRange>(); }
TagsType::TagsType(NamespaceString nss, std::string tag, ChunkRange range) : _ns(std::move(nss)), _tag(std::move(tag)), _minKey(range.getMin().getOwned()), _maxKey(range.getMax().getOwned()) {}
StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx, Collection* collection, BSONObj const& keyPattern, ChunkRange const& range, int maxToDelete) { invariant(collection != nullptr); invariant(!isEmpty()); auto const& nss = collection->ns(); // The IndexChunk has a keyPattern that may apply to more than one index - we need to // select the index and get the full index keyPattern here. auto catalog = collection->getIndexCatalog(); const IndexDescriptor* idx = catalog->findShardKeyPrefixedIndex(opCtx, keyPattern, false); if (!idx) { std::string msg = str::stream() << "Unable to find shard key index for " << keyPattern.toString() << " in " << nss.ns(); LOG(0) << msg; return {ErrorCodes::InternalError, msg}; } // Extend bounds to match the index we found const KeyPattern indexKeyPattern(idx->keyPattern()); const auto extend = [&](const auto& key) { return Helpers::toKeyFormat(indexKeyPattern.extendRangeBound(key, false)); }; const auto min = extend(range.getMin()); const auto max = extend(range.getMax()); LOG(1) << "begin removal of " << min << " to " << max << " in " << nss.ns(); const auto indexName = idx->indexName(); const IndexDescriptor* descriptor = collection->getIndexCatalog()->findIndexByName(opCtx, indexName); if (!descriptor) { std::string msg = str::stream() << "shard key index with name " << indexName << " on '" << nss.ns() << "' was dropped"; LOG(0) << msg; return {ErrorCodes::InternalError, msg}; } auto deleteStageParams = std::make_unique<DeleteStageParams>(); deleteStageParams->fromMigrate = true; deleteStageParams->isMulti = true; deleteStageParams->returnDeleted = true; if (serverGlobalParams.moveParanoia) { deleteStageParams->removeSaver = std::make_unique<RemoveSaver>("moveChunk", nss.ns(), "cleaning"); } auto exec = InternalPlanner::deleteWithIndexScan(opCtx, collection, std::move(deleteStageParams), descriptor, min, max, BoundInclusion::kIncludeStartKeyOnly, PlanExecutor::YIELD_MANUAL, InternalPlanner::FORWARD); PlanYieldPolicy planYieldPolicy(exec.get(), PlanExecutor::YIELD_MANUAL); int numDeleted = 0; do { BSONObj deletedObj; PlanExecutor::ExecState state = exec->getNext(&deletedObj, nullptr); if (state == PlanExecutor::IS_EOF) { break; } if (state == PlanExecutor::FAILURE) { warning() << PlanExecutor::statestr(state) << " - cursor error while trying to delete " << redact(min) << " to " << redact(max) << " in " << nss << ": FAILURE, stats: " << Explain::getWinningPlanStats(exec.get()); break; } invariant(PlanExecutor::ADVANCED == state); ShardingStatistics::get(opCtx).countDocsDeletedOnDonor.addAndFetch(1); } while (++numDeleted < maxToDelete); return numDeleted; }
bool MetadataManager::isInRangesToClean(const ChunkRange& range) { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); // For convenience, this line makes an unnecessary copy, to reuse the // rangeMapContains helper function. return rangeMapContains(_getCopyOfRangesToClean_inlock(), range.getMin(), range.getMax()); }
StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx, Collection* collection, BSONObj const& keyPattern, ChunkRange const& range, int maxToDelete) { invariant(collection != nullptr); invariant(!isEmpty()); auto const& nss = collection->ns(); // The IndexChunk has a keyPattern that may apply to more than one index - we need to // select the index and get the full index keyPattern here. auto catalog = collection->getIndexCatalog(); const IndexDescriptor* idx = catalog->findShardKeyPrefixedIndex(opCtx, keyPattern, false); if (!idx) { std::string msg = str::stream() << "Unable to find shard key index for " << keyPattern.toString() << " in " << nss.ns(); LOG(0) << msg; return {ErrorCodes::InternalError, msg}; } // Extend bounds to match the index we found const KeyPattern indexKeyPattern(idx->keyPattern()); const auto extend = [&](const auto& key) { return Helpers::toKeyFormat(indexKeyPattern.extendRangeBound(key, false)); }; const auto min = extend(range.getMin()); const auto max = extend(range.getMax()); LOG(1) << "begin removal of " << min << " to " << max << " in " << nss.ns(); const auto indexName = idx->indexName(); IndexDescriptor* descriptor = collection->getIndexCatalog()->findIndexByName(opCtx, indexName); if (!descriptor) { std::string msg = str::stream() << "shard key index with name " << indexName << " on '" << nss.ns() << "' was dropped"; LOG(0) << msg; return {ErrorCodes::InternalError, msg}; } boost::optional<Helpers::RemoveSaver> saver; if (serverGlobalParams.moveParanoia) { saver.emplace("moveChunk", nss.ns(), "cleaning"); } auto halfOpen = BoundInclusion::kIncludeStartKeyOnly; auto manual = PlanExecutor::YIELD_MANUAL; auto forward = InternalPlanner::FORWARD; auto fetch = InternalPlanner::IXSCAN_FETCH; auto exec = InternalPlanner::indexScan( opCtx, collection, descriptor, min, max, halfOpen, manual, forward, fetch); int numDeleted = 0; do { RecordId rloc; BSONObj obj; PlanExecutor::ExecState state = exec->getNext(&obj, &rloc); if (state == PlanExecutor::IS_EOF) { break; } if (state == PlanExecutor::FAILURE || state == PlanExecutor::DEAD) { warning() << PlanExecutor::statestr(state) << " - cursor error while trying to delete " << redact(min) << " to " << redact(max) << " in " << nss << ": " << redact(WorkingSetCommon::toStatusString(obj)) << ", stats: " << Explain::getWinningPlanStats(exec.get()); break; } invariant(PlanExecutor::ADVANCED == state); exec->saveState(); writeConflictRetry(opCtx, "delete range", nss.ns(), [&] { WriteUnitOfWork wuow(opCtx); if (saver) { uassertStatusOK(saver->goingToDelete(obj)); } collection->deleteDocument(opCtx, kUninitializedStmtId, rloc, nullptr, true); wuow.commit(); }); try { exec->restoreState(); } catch (const DBException& ex) { warning() << "error restoring cursor state while trying to delete " << redact(min) << " to " << redact(max) << " in " << nss << ", stats: " << Explain::getWinningPlanStats(exec.get()) << ": " << redact(ex.toStatus()); break; } ShardingStatistics::get(opCtx).countDocsDeletedOnDonor.addAndFetch(1); } while (++numDeleted < maxToDelete); return numDeleted; }