boost::optional<MigrateInfo> BalancerPolicy::balanceSingleChunk( const ChunkType& chunk, const ShardStatisticsVector& shardStats, const DistributionStatus& distribution) { const string tag = distribution.getTagForChunk(chunk); ShardId newShardId = _getLeastLoadedReceiverShard(shardStats, distribution, tag, {}); if (!newShardId.isValid() || newShardId == chunk.getShard()) { return boost::optional<MigrateInfo>(); } return MigrateInfo(distribution.nss().ns(), newShardId, chunk); }
ShardId BalancerPolicy::_getLeastLoadedReceiverShard(const ShardStatisticsVector& shardStats, const DistributionStatus& distribution, const string& tag, const set<ShardId>& excludedShards) { ShardId best; unsigned minChunks = numeric_limits<unsigned>::max(); for (const auto& stat : shardStats) { if (excludedShards.count(stat.shardId)) continue; auto status = isShardSuitableReceiver(stat, tag); if (!status.isOK()) { continue; } unsigned myChunks = distribution.numberOfChunksInShard(stat.shardId); if (myChunks >= minChunks) { continue; } best = stat.shardId; minChunks = myChunks; } return best; }
StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::_getMigrateCandidatesForCollection( OperationContext* txn, const NamespaceString& nss, const ShardStatisticsVector& shardStats, bool aggressiveBalanceHint) { auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss); if (!scopedCMStatus.isOK()) { return scopedCMStatus.getStatus(); } auto scopedCM = std::move(scopedCMStatus.getValue()); ChunkManager* const cm = scopedCM.cm(); auto collInfoStatus = createCollectionDistributionInfo(txn, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); } auto collInfo = std::move(collInfoStatus.getValue()); DistributionStatus distribution = std::move(std::get<0>(collInfo)); ChunkMinimumsSet allChunkMinimums = std::move(std::get<1>(collInfo)); for (const auto& tagRangeEntry : distribution.tagRanges()) { const auto& tagRange = tagRangeEntry.second; if (!allChunkMinimums.count(tagRange.min)) { // This tag falls somewhere at the middle of a chunk. Therefore we must skip balancing // this collection until it is split at the next iteration. // // TODO: We should be able to just skip chunks, which straddle tags and still make some // progress balancing. return {ErrorCodes::IllegalOperation, str::stream() << "Tag boundaries " << tagRange.toString() << " fall in the middle of an existing chunk. Balancing for collection " << nss.ns() << " will be postponed until the chunk is split appropriately."}; } } return BalancerPolicy::balance(shardStats, distribution, aggressiveBalanceHint); }
Status BalancerChunkSelectionPolicyImpl::checkMoveAllowed(OperationContext* txn, const ChunkType& chunk, const ShardId& newShardId) { auto shardStatsStatus = _clusterStats->getStats(txn); if (!shardStatsStatus.isOK()) { return shardStatsStatus.getStatus(); } auto shardStats = std::move(shardStatsStatus.getValue()); const NamespaceString nss(chunk.getNS()); auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss); if (!scopedCMStatus.isOK()) { return scopedCMStatus.getStatus(); } auto scopedCM = std::move(scopedCMStatus.getValue()); ChunkManager* const cm = scopedCM.cm(); auto collInfoStatus = createCollectionDistributionInfo(txn, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); } auto collInfo = std::move(collInfoStatus.getValue()); DistributionStatus distribution = std::move(std::get<0>(collInfo)); auto newShardIterator = std::find_if(shardStats.begin(), shardStats.end(), [&newShardId](const ClusterStatistics::ShardStatistics& stat) { return stat.shardId == newShardId; }); if (newShardIterator == shardStats.end()) { return {ErrorCodes::ShardNotFound, str::stream() << "Unable to find constraints information for shard " << newShardId << ". Move to this shard will be disallowed."}; } return BalancerPolicy::isShardSuitableReceiver(*newShardIterator, distribution.getTagForChunk(chunk)); }
ShardId BalancerPolicy::_getMostOverloadedShard(const ShardStatisticsVector& shardStats, const DistributionStatus& distribution, const string& chunkTag, const set<ShardId>& excludedShards) { ShardId worst; unsigned maxChunks = 0; for (const auto& stat : shardStats) { if (excludedShards.count(stat.shardId)) continue; const unsigned shardChunkCount = distribution.numberOfChunksInShardWithTag(stat.shardId, chunkTag); if (shardChunkCount <= maxChunks) continue; worst = stat.shardId; maxChunks = shardChunkCount; } return worst; }
bool BalancerPolicy::_singleZoneBalance(const ShardStatisticsVector& shardStats, const DistributionStatus& distribution, const string& tag, size_t imbalanceThreshold, vector<MigrateInfo>* migrations, set<ShardId>* usedShards) { const ShardId from = _getMostOverloadedShard(shardStats, distribution, tag, *usedShards); if (!from.isValid()) return false; const size_t max = distribution.numberOfChunksInShardWithTag(from, tag); if (max == 0) return false; const ShardId to = _getLeastLoadedReceiverShard(shardStats, distribution, tag, *usedShards); if (!to.isValid()) { if (migrations->empty()) { log() << "No available shards to take chunks for tag [" << tag << "]"; } return false; } const size_t min = distribution.numberOfChunksInShardWithTag(to, tag); if (min >= max) return false; const size_t totalNumberOfChunksWithTag = (tag.empty() ? distribution.totalChunks() : distribution.totalChunksWithTag(tag)); size_t totalNumberOfShardsWithTag = 0; for (const auto& stat : shardStats) { if (tag.empty() || stat.shardTags.count(tag)) { totalNumberOfShardsWithTag++; } } // totalNumberOfShardsWithTag cannot be zero if the to shard is valid invariant(totalNumberOfShardsWithTag); invariant(totalNumberOfChunksWithTag >= max); // The ideal should be at least one per shard const size_t idealNumberOfChunksPerShardWithTag = (totalNumberOfChunksWithTag < totalNumberOfShardsWithTag) ? 1 : (totalNumberOfChunksWithTag / totalNumberOfShardsWithTag); const size_t imbalance = max - idealNumberOfChunksPerShardWithTag; LOG(1) << "collection : " << distribution.nss().ns(); LOG(1) << "zone : " << tag; LOG(1) << "donor : " << from << " chunks on " << max; LOG(1) << "receiver : " << to << " chunks on " << min; LOG(1) << "ideal : " << idealNumberOfChunksPerShardWithTag; LOG(1) << "threshold : " << imbalanceThreshold; // Check whether it is necessary to balance within this zone if (imbalance < imbalanceThreshold) return false; const vector<ChunkType>& chunks = distribution.getChunks(from); unsigned numJumboChunks = 0; for (const auto& chunk : chunks) { if (distribution.getTagForChunk(chunk) != tag) continue; if (chunk.getJumbo()) { numJumboChunks++; continue; } migrations->emplace_back(distribution.nss().ns(), to, chunk); invariant(usedShards->insert(chunk.getShard()).second); invariant(usedShards->insert(to).second); return true; } if (numJumboChunks) { warning() << "Shard: " << from << ", collection: " << distribution.nss().ns() << " has only jumbo chunks for zone \'" << tag << "\' and cannot be balanced. Jumbo chunks count: " << numJumboChunks; } return false; }
vector<MigrateInfo> BalancerPolicy::balance(const ShardStatisticsVector& shardStats, const DistributionStatus& distribution, bool shouldAggressivelyBalance) { vector<MigrateInfo> migrations; // Set of shards, which have already been used for migrations. Used so we don't return multiple // migrations for the same shard. set<ShardId> usedShards; // 1) Check for shards, which are in draining mode and must have chunks moved off of them { for (const auto& stat : shardStats) { if (!stat.isDraining) continue; const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId); if (chunks.empty()) continue; // Now we know we need to move to chunks off this shard, but only if permitted by the // tags policy unsigned numJumboChunks = 0; // Since we have to move all chunks, lets just do in order for (const auto& chunk : chunks) { if (chunk.getJumbo()) { numJumboChunks++; continue; } const string tag = distribution.getTagForChunk(chunk); const ShardId to = _getLeastLoadedReceiverShard(shardStats, distribution, tag, usedShards); if (!to.isValid()) { if (migrations.empty()) { warning() << "Chunk " << chunk << " is on a draining shard, but no appropriate recipient found"; } continue; } invariant(to != stat.shardId); migrations.emplace_back(distribution.nss().ns(), to, chunk); invariant(usedShards.insert(stat.shardId).second); invariant(usedShards.insert(to).second); break; } if (migrations.empty()) { warning() << "Unable to find any chunk to move from draining shard " << stat.shardId << ". numJumboChunks: " << numJumboChunks; } } } // 2) Check for chunks, which are on the wrong shard and must be moved off of it if (!distribution.tags().empty()) { for (const auto& stat : shardStats) { const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId); for (const auto& chunk : chunks) { const string tag = distribution.getTagForChunk(chunk); if (tag.empty()) continue; if (stat.shardTags.count(tag)) continue; if (chunk.getJumbo()) { warning() << "chunk " << chunk << " violates tag " << tag << ", but it is jumbo and cannot be moved"; continue; } const ShardId to = _getLeastLoadedReceiverShard(shardStats, distribution, tag, usedShards); if (!to.isValid()) { if (migrations.empty()) { warning() << "chunk " << chunk << " violates tag " << tag << ", but no appropriate recipient found"; } continue; } invariant(to != stat.shardId); migrations.emplace_back(distribution.nss().ns(), to, chunk); invariant(usedShards.insert(stat.shardId).second); invariant(usedShards.insert(to).second); break; } } } // 3) for each tag balance const size_t imbalanceThreshold = (shouldAggressivelyBalance || distribution.totalChunks() < 20) ? kAggressiveImbalanceThreshold : kDefaultImbalanceThreshold; vector<string> tagsPlusEmpty(distribution.tags().begin(), distribution.tags().end()); tagsPlusEmpty.push_back(""); for (const auto& tag : tagsPlusEmpty) { while (_singleZoneBalance( shardStats, distribution, tag, imbalanceThreshold, &migrations, &usedShards)) ; } return migrations; }
StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidatesForCollection( OperationContext* txn, const NamespaceString& nss, const ShardStatisticsVector& shardStats) { auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss); if (!scopedCMStatus.isOK()) { return scopedCMStatus.getStatus(); } auto scopedCM = std::move(scopedCMStatus.getValue()); ChunkManager* const cm = scopedCM.cm(); auto collInfoStatus = createCollectionDistributionInfo(txn, shardStats, cm); if (!collInfoStatus.isOK()) { return collInfoStatus.getStatus(); } auto collInfo = std::move(collInfoStatus.getValue()); DistributionStatus distribution = std::move(std::get<0>(collInfo)); ChunkMinimumsSet allChunkMinimums = std::move(std::get<1>(collInfo)); SplitInfoVector splitCandidates; // Accumulate split points for the same chunk together shared_ptr<Chunk> currentChunk; vector<BSONObj> currentSplitVector; for (const auto& tagRangeEntry : distribution.tagRanges()) { const auto& tagRange = tagRangeEntry.second; if (allChunkMinimums.count(tagRange.min)) { continue; } shared_ptr<Chunk> chunk = cm->findIntersectingChunkWithSimpleCollation(txn, tagRange.min); if (!currentChunk) { currentChunk = chunk; } invariant(currentChunk); if (chunk == currentChunk) { currentSplitVector.push_back(tagRange.min); } else { splitCandidates.emplace_back(currentChunk->getShardId(), nss, cm->getVersion(), currentChunk->getLastmod(), currentChunk->getMin(), currentChunk->getMax(), std::move(currentSplitVector)); currentChunk = chunk; currentSplitVector.push_back(tagRange.min); } } // Drain the current split vector if there are any entries left if (currentChunk) { invariant(!currentSplitVector.empty()); splitCandidates.emplace_back(currentChunk->getShardId(), nss, cm->getVersion(), currentChunk->getLastmod(), currentChunk->getMin(), currentChunk->getMax(), std::move(currentSplitVector)); } return splitCandidates; }