Example #1
0
boost::optional<MigrateInfo> BalancerPolicy::balanceSingleChunk(
    const ChunkType& chunk,
    const ShardStatisticsVector& shardStats,
    const DistributionStatus& distribution) {
    const string tag = distribution.getTagForChunk(chunk);

    ShardId newShardId = _getLeastLoadedReceiverShard(shardStats, distribution, tag, {});
    if (!newShardId.isValid() || newShardId == chunk.getShard()) {
        return boost::optional<MigrateInfo>();
    }

    return MigrateInfo(distribution.nss().ns(), newShardId, chunk);
}
Example #2
0
ShardId BalancerPolicy::_getLeastLoadedReceiverShard(const ShardStatisticsVector& shardStats,
                                                     const DistributionStatus& distribution,
                                                     const string& tag,
                                                     const set<ShardId>& excludedShards) {
    ShardId best;
    unsigned minChunks = numeric_limits<unsigned>::max();

    for (const auto& stat : shardStats) {
        if (excludedShards.count(stat.shardId))
            continue;

        auto status = isShardSuitableReceiver(stat, tag);
        if (!status.isOK()) {
            continue;
        }

        unsigned myChunks = distribution.numberOfChunksInShard(stat.shardId);
        if (myChunks >= minChunks) {
            continue;
        }

        best = stat.shardId;
        minChunks = myChunks;
    }

    return best;
}
StatusWith<MigrateInfoVector> BalancerChunkSelectionPolicyImpl::_getMigrateCandidatesForCollection(
    OperationContext* txn,
    const NamespaceString& nss,
    const ShardStatisticsVector& shardStats,
    bool aggressiveBalanceHint) {
    auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss);
    if (!scopedCMStatus.isOK()) {
        return scopedCMStatus.getStatus();
    }

    auto scopedCM = std::move(scopedCMStatus.getValue());
    ChunkManager* const cm = scopedCM.cm();

    auto collInfoStatus = createCollectionDistributionInfo(txn, shardStats, cm);
    if (!collInfoStatus.isOK()) {
        return collInfoStatus.getStatus();
    }

    auto collInfo = std::move(collInfoStatus.getValue());

    DistributionStatus distribution = std::move(std::get<0>(collInfo));
    ChunkMinimumsSet allChunkMinimums = std::move(std::get<1>(collInfo));

    for (const auto& tagRangeEntry : distribution.tagRanges()) {
        const auto& tagRange = tagRangeEntry.second;

        if (!allChunkMinimums.count(tagRange.min)) {
            // This tag falls somewhere at the middle of a chunk. Therefore we must skip balancing
            // this collection until it is split at the next iteration.
            //
            // TODO: We should be able to just skip chunks, which straddle tags and still make some
            // progress balancing.
            return {ErrorCodes::IllegalOperation,
                    str::stream()
                        << "Tag boundaries "
                        << tagRange.toString()
                        << " fall in the middle of an existing chunk. Balancing for collection "
                        << nss.ns()
                        << " will be postponed until the chunk is split appropriately."};
        }
    }

    return BalancerPolicy::balance(shardStats, distribution, aggressiveBalanceHint);
}
Status BalancerChunkSelectionPolicyImpl::checkMoveAllowed(OperationContext* txn,
                                                          const ChunkType& chunk,
                                                          const ShardId& newShardId) {
    auto shardStatsStatus = _clusterStats->getStats(txn);
    if (!shardStatsStatus.isOK()) {
        return shardStatsStatus.getStatus();
    }

    auto shardStats = std::move(shardStatsStatus.getValue());

    const NamespaceString nss(chunk.getNS());

    auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss);
    if (!scopedCMStatus.isOK()) {
        return scopedCMStatus.getStatus();
    }

    auto scopedCM = std::move(scopedCMStatus.getValue());
    ChunkManager* const cm = scopedCM.cm();

    auto collInfoStatus = createCollectionDistributionInfo(txn, shardStats, cm);
    if (!collInfoStatus.isOK()) {
        return collInfoStatus.getStatus();
    }

    auto collInfo = std::move(collInfoStatus.getValue());

    DistributionStatus distribution = std::move(std::get<0>(collInfo));

    auto newShardIterator =
        std::find_if(shardStats.begin(),
                     shardStats.end(),
                     [&newShardId](const ClusterStatistics::ShardStatistics& stat) {
                         return stat.shardId == newShardId;
                     });
    if (newShardIterator == shardStats.end()) {
        return {ErrorCodes::ShardNotFound,
                str::stream() << "Unable to find constraints information for shard " << newShardId
                              << ". Move to this shard will be disallowed."};
    }

    return BalancerPolicy::isShardSuitableReceiver(*newShardIterator,
                                                   distribution.getTagForChunk(chunk));
}
Example #5
0
ShardId BalancerPolicy::_getMostOverloadedShard(const ShardStatisticsVector& shardStats,
                                                const DistributionStatus& distribution,
                                                const string& chunkTag,
                                                const set<ShardId>& excludedShards) {
    ShardId worst;
    unsigned maxChunks = 0;

    for (const auto& stat : shardStats) {
        if (excludedShards.count(stat.shardId))
            continue;

        const unsigned shardChunkCount =
            distribution.numberOfChunksInShardWithTag(stat.shardId, chunkTag);
        if (shardChunkCount <= maxChunks)
            continue;

        worst = stat.shardId;
        maxChunks = shardChunkCount;
    }

    return worst;
}
Example #6
0
bool BalancerPolicy::_singleZoneBalance(const ShardStatisticsVector& shardStats,
                                        const DistributionStatus& distribution,
                                        const string& tag,
                                        size_t imbalanceThreshold,
                                        vector<MigrateInfo>* migrations,
                                        set<ShardId>* usedShards) {
    const ShardId from = _getMostOverloadedShard(shardStats, distribution, tag, *usedShards);
    if (!from.isValid())
        return false;

    const size_t max = distribution.numberOfChunksInShardWithTag(from, tag);
    if (max == 0)
        return false;

    const ShardId to = _getLeastLoadedReceiverShard(shardStats, distribution, tag, *usedShards);
    if (!to.isValid()) {
        if (migrations->empty()) {
            log() << "No available shards to take chunks for tag [" << tag << "]";
        }
        return false;
    }

    const size_t min = distribution.numberOfChunksInShardWithTag(to, tag);
    if (min >= max)
        return false;

    const size_t totalNumberOfChunksWithTag =
        (tag.empty() ? distribution.totalChunks() : distribution.totalChunksWithTag(tag));

    size_t totalNumberOfShardsWithTag = 0;

    for (const auto& stat : shardStats) {
        if (tag.empty() || stat.shardTags.count(tag)) {
            totalNumberOfShardsWithTag++;
        }
    }

    // totalNumberOfShardsWithTag cannot be zero if the to shard is valid
    invariant(totalNumberOfShardsWithTag);
    invariant(totalNumberOfChunksWithTag >= max);

    // The ideal should be at least one per shard
    const size_t idealNumberOfChunksPerShardWithTag =
        (totalNumberOfChunksWithTag < totalNumberOfShardsWithTag)
        ? 1
        : (totalNumberOfChunksWithTag / totalNumberOfShardsWithTag);

    const size_t imbalance = max - idealNumberOfChunksPerShardWithTag;

    LOG(1) << "collection : " << distribution.nss().ns();
    LOG(1) << "zone       : " << tag;
    LOG(1) << "donor      : " << from << " chunks on " << max;
    LOG(1) << "receiver   : " << to << " chunks on " << min;
    LOG(1) << "ideal      : " << idealNumberOfChunksPerShardWithTag;
    LOG(1) << "threshold  : " << imbalanceThreshold;

    // Check whether it is necessary to balance within this zone
    if (imbalance < imbalanceThreshold)
        return false;

    const vector<ChunkType>& chunks = distribution.getChunks(from);

    unsigned numJumboChunks = 0;

    for (const auto& chunk : chunks) {
        if (distribution.getTagForChunk(chunk) != tag)
            continue;

        if (chunk.getJumbo()) {
            numJumboChunks++;
            continue;
        }

        migrations->emplace_back(distribution.nss().ns(), to, chunk);
        invariant(usedShards->insert(chunk.getShard()).second);
        invariant(usedShards->insert(to).second);
        return true;
    }

    if (numJumboChunks) {
        warning() << "Shard: " << from << ", collection: " << distribution.nss().ns()
                  << " has only jumbo chunks for zone \'" << tag
                  << "\' and cannot be balanced. Jumbo chunks count: " << numJumboChunks;
    }

    return false;
}
Example #7
0
vector<MigrateInfo> BalancerPolicy::balance(const ShardStatisticsVector& shardStats,
                                            const DistributionStatus& distribution,
                                            bool shouldAggressivelyBalance) {
    vector<MigrateInfo> migrations;

    // Set of shards, which have already been used for migrations. Used so we don't return multiple
    // migrations for the same shard.
    set<ShardId> usedShards;

    // 1) Check for shards, which are in draining mode and must have chunks moved off of them
    {
        for (const auto& stat : shardStats) {
            if (!stat.isDraining)
                continue;

            const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId);

            if (chunks.empty())
                continue;

            // Now we know we need to move to chunks off this shard, but only if permitted by the
            // tags policy
            unsigned numJumboChunks = 0;

            // Since we have to move all chunks, lets just do in order
            for (const auto& chunk : chunks) {
                if (chunk.getJumbo()) {
                    numJumboChunks++;
                    continue;
                }

                const string tag = distribution.getTagForChunk(chunk);

                const ShardId to =
                    _getLeastLoadedReceiverShard(shardStats, distribution, tag, usedShards);
                if (!to.isValid()) {
                    if (migrations.empty()) {
                        warning() << "Chunk " << chunk
                                  << " is on a draining shard, but no appropriate recipient found";
                    }
                    continue;
                }

                invariant(to != stat.shardId);
                migrations.emplace_back(distribution.nss().ns(), to, chunk);
                invariant(usedShards.insert(stat.shardId).second);
                invariant(usedShards.insert(to).second);
                break;
            }

            if (migrations.empty()) {
                warning() << "Unable to find any chunk to move from draining shard " << stat.shardId
                          << ". numJumboChunks: " << numJumboChunks;
            }
        }
    }

    // 2) Check for chunks, which are on the wrong shard and must be moved off of it
    if (!distribution.tags().empty()) {
        for (const auto& stat : shardStats) {
            const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId);
            for (const auto& chunk : chunks) {
                const string tag = distribution.getTagForChunk(chunk);
                if (tag.empty())
                    continue;

                if (stat.shardTags.count(tag))
                    continue;

                if (chunk.getJumbo()) {
                    warning() << "chunk " << chunk << " violates tag " << tag
                              << ", but it is jumbo and cannot be moved";
                    continue;
                }

                const ShardId to =
                    _getLeastLoadedReceiverShard(shardStats, distribution, tag, usedShards);
                if (!to.isValid()) {
                    if (migrations.empty()) {
                        warning() << "chunk " << chunk << " violates tag " << tag
                                  << ", but no appropriate recipient found";
                    }
                    continue;
                }

                invariant(to != stat.shardId);
                migrations.emplace_back(distribution.nss().ns(), to, chunk);
                invariant(usedShards.insert(stat.shardId).second);
                invariant(usedShards.insert(to).second);
                break;
            }
        }
    }

    // 3) for each tag balance
    const size_t imbalanceThreshold = (shouldAggressivelyBalance || distribution.totalChunks() < 20)
        ? kAggressiveImbalanceThreshold
        : kDefaultImbalanceThreshold;

    vector<string> tagsPlusEmpty(distribution.tags().begin(), distribution.tags().end());
    tagsPlusEmpty.push_back("");

    for (const auto& tag : tagsPlusEmpty) {
        while (_singleZoneBalance(
            shardStats, distribution, tag, imbalanceThreshold, &migrations, &usedShards))
            ;
    }

    return migrations;
}
StatusWith<SplitInfoVector> BalancerChunkSelectionPolicyImpl::_getSplitCandidatesForCollection(
    OperationContext* txn, const NamespaceString& nss, const ShardStatisticsVector& shardStats) {
    auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss);
    if (!scopedCMStatus.isOK()) {
        return scopedCMStatus.getStatus();
    }

    auto scopedCM = std::move(scopedCMStatus.getValue());
    ChunkManager* const cm = scopedCM.cm();

    auto collInfoStatus = createCollectionDistributionInfo(txn, shardStats, cm);
    if (!collInfoStatus.isOK()) {
        return collInfoStatus.getStatus();
    }

    auto collInfo = std::move(collInfoStatus.getValue());

    DistributionStatus distribution = std::move(std::get<0>(collInfo));
    ChunkMinimumsSet allChunkMinimums = std::move(std::get<1>(collInfo));

    SplitInfoVector splitCandidates;

    // Accumulate split points for the same chunk together
    shared_ptr<Chunk> currentChunk;
    vector<BSONObj> currentSplitVector;

    for (const auto& tagRangeEntry : distribution.tagRanges()) {
        const auto& tagRange = tagRangeEntry.second;

        if (allChunkMinimums.count(tagRange.min)) {
            continue;
        }

        shared_ptr<Chunk> chunk = cm->findIntersectingChunkWithSimpleCollation(txn, tagRange.min);

        if (!currentChunk) {
            currentChunk = chunk;
        }

        invariant(currentChunk);

        if (chunk == currentChunk) {
            currentSplitVector.push_back(tagRange.min);
        } else {
            splitCandidates.emplace_back(currentChunk->getShardId(),
                                         nss,
                                         cm->getVersion(),
                                         currentChunk->getLastmod(),
                                         currentChunk->getMin(),
                                         currentChunk->getMax(),
                                         std::move(currentSplitVector));

            currentChunk = chunk;
            currentSplitVector.push_back(tagRange.min);
        }
    }

    // Drain the current split vector if there are any entries left
    if (currentChunk) {
        invariant(!currentSplitVector.empty());

        splitCandidates.emplace_back(currentChunk->getShardId(),
                                     nss,
                                     cm->getVersion(),
                                     currentChunk->getLastmod(),
                                     currentChunk->getMin(),
                                     currentChunk->getMax(),
                                     std::move(currentSplitVector));
    }

    return splitCandidates;
}