void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder,
                                       const NamespaceString& nss,
                                       ChunkVersion chunkVersion,
                                       const ConnectionString& configServerConnectionString,
                                       const ShardId& fromShardId,
                                       const ShardId& toShardId,
                                       const ChunkRange& range,
                                       int64_t maxChunkSizeBytes,
                                       const MigrationSecondaryThrottleOptions& secondaryThrottle,
                                       bool waitForDelete) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kMoveChunk, nss.ns());
    chunkVersion.appendToCommand(builder);  // 3.4 shard compatibility
    builder->append(kEpoch, chunkVersion.epoch());
    // config connection string is included for 3.4 shard compatibility
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardId, fromShardId.toString());
    builder->append(kToShardId, toShardId.toString());
    range.append(builder);
    builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes));
    secondaryThrottle.append(builder);
    builder->append(kWaitForDelete, waitForDelete);
    builder->append(kTakeDistLock, false);
}
Пример #2
0
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder,
                                       const NamespaceString& nss,
                                       const ChunkVersion& shardVersion,
                                       const ConnectionString& configServerConnectionString,
                                       const ShardId& fromShardId,
                                       const ShardId& toShardId,
                                       const ChunkRange& range,
                                       int64_t maxChunkSizeBytes,
                                       const MigrationSecondaryThrottleOptions& secondaryThrottle,
                                       bool waitForDelete,
                                       bool takeDistLock) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kMoveChunk, nss.ns());
    shardVersion.appendForCommands(builder);
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardId, fromShardId.toString());
    builder->append(kToShardId, toShardId.toString());
    range.append(builder);
    builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes));
    secondaryThrottle.append(builder);
    builder->append(kWaitForDelete, waitForDelete);
    builder->append(kTakeDistLock, takeDistLock);
}
StatusWith<boost::optional<MigrateInfo>>
BalancerChunkSelectionPolicyImpl::selectSpecificChunkToMove(OperationContext* txn,
                                                            const ChunkType& chunk) {
    const NamespaceString nss(chunk.getNS());

    auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss);
    if (!scopedCMStatus.isOK()) {
        return scopedCMStatus.getStatus();
    }

    auto scopedCM = std::move(scopedCMStatus.getValue());
    ChunkManager* const cm = scopedCM.cm();

    auto tagForChunkStatus =
        Grid::get(txn)->catalogManager(txn)->getTagForChunk(txn, nss.ns(), chunk);
    if (!tagForChunkStatus.isOK()) {
        return tagForChunkStatus.getStatus();
    }

    auto shardStatsStatus = _clusterStats->getStats(txn);
    if (!shardStatsStatus.isOK()) {
        return shardStatsStatus.getStatus();
    }

    auto collInfo = createCollectionDistributionInfo(shardStatsStatus.getValue(), cm);
    ShardToChunksMap shardToChunksMap = std::move(std::get<0>(collInfo));

    DistributionStatus distStatus(shardStatsStatus.getValue(), shardToChunksMap);
    const ShardId newShardId(distStatus.getBestReceieverShard(tagForChunkStatus.getValue()));
    if (newShardId.empty() || newShardId == chunk.getShard()) {
        return boost::optional<MigrateInfo>();
    }

    return boost::optional<MigrateInfo>{MigrateInfo(nss.ns(), newShardId, chunk)};
}
Пример #4
0
void StartChunkCloneRequest::appendAsCommand(
    BSONObjBuilder* builder,
    const NamespaceString& nss,
    const MigrationSessionId& sessionId,
    const ConnectionString& configServerConnectionString,
    const ConnectionString& fromShardConnectionString,
    const ShardId& fromShardId,
    const ShardId& toShardId,
    const BSONObj& chunkMinKey,
    const BSONObj& chunkMaxKey,
    const BSONObj& shardKeyPattern,
    const MigrationSecondaryThrottleOptions& secondaryThrottle) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());
    invariant(fromShardConnectionString.isValid());

    builder->append(kRecvChunkStart, nss.ns());
    sessionId.append(builder);
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardConnectionString, fromShardConnectionString.toString());
    builder->append(kFromShardId, fromShardId.toString());
    builder->append(kToShardId, toShardId.toString());
    builder->append(kChunkMinKey, chunkMinKey);
    builder->append(kChunkMaxKey, chunkMaxKey);
    builder->append(kShardKeyPattern, shardKeyPattern);
    secondaryThrottle.append(builder);
}
Пример #5
0
boost::optional<MigrateInfo> BalancerPolicy::balanceSingleChunk(
    const ChunkType& chunk,
    const ShardStatisticsVector& shardStats,
    const DistributionStatus& distribution) {
    const string tag = distribution.getTagForChunk(chunk);

    ShardId newShardId = _getLeastLoadedReceiverShard(shardStats, distribution, tag, {});
    if (!newShardId.isValid() || newShardId == chunk.getShard()) {
        return boost::optional<MigrateInfo>();
    }

    return MigrateInfo(distribution.nss().ns(), newShardId, chunk);
}
StatusWith<std::vector<std::string>> ShardingCatalogManager::getDatabasesForShard(
    OperationContext* opCtx, const ShardId& shardId) {
    auto findStatus = Grid::get(opCtx)->catalogClient()->_exhaustiveFindOnConfig(
        opCtx,
        kConfigReadSelector,
        repl::ReadConcernLevel::kLocalReadConcern,
        DatabaseType::ConfigNS,
        BSON(DatabaseType::primary(shardId.toString())),
        BSONObj(),
        boost::none);  // no limit

    if (!findStatus.isOK())
        return findStatus.getStatus();

    std::vector<std::string> dbs;
    for (const BSONObj& obj : findStatus.getValue().value) {
        std::string dbName;
        Status status = bsonExtractStringField(obj, DatabaseType::name(), &dbName);
        if (!status.isOK()) {
            return status;
        }

        dbs.push_back(dbName);
    }

    return dbs;
}
void CommitChunkMigrationRequest::appendAsCommand(
    BSONObjBuilder* builder,
    const NamespaceString& nss,
    const ShardId& fromShard,
    const ShardId& toShard,
    const ChunkType& migratedChunkType,
    const boost::optional<ChunkType>& controlChunkType) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kConfigSvrCommitChunkMigration, nss.ns());
    builder->append(kFromShard, fromShard.toString());
    builder->append(kToShard, toShard.toString());
    builder->append(kMigratedChunk, migratedChunkType.toBSON());

    if (controlChunkType) {
        builder->append(kControlChunk, controlChunkType->toBSON());
    }
}
Пример #8
0
void appendWriteConcernErrorToCmdResponse(const ShardId& shardId,
                                          const BSONElement& wcErrorElem,
                                          BSONObjBuilder& responseBuilder) {
    WriteConcernErrorDetail wcError;
    std::string errMsg;
    auto wcErrorObj = wcErrorElem.Obj();
    if (!wcError.parseBSON(wcErrorObj, &errMsg)) {
        wcError.clear();
        wcError.setStatus({ErrorCodes::FailedToParse,
                           "Failed to parse writeConcernError: " + wcErrorObj.toString() +
                               ", Received error: " + errMsg});
    }
    auto status = wcError.toStatus();
    wcError.setStatus(
        status.withReason(str::stream() << status.reason() << " at " << shardId.toString()));
    responseBuilder.append("writeConcernError", wcError.toBSON());
}
Пример #9
0
bool BalancerPolicy::_singleZoneBalance(const ShardStatisticsVector& shardStats,
                                        const DistributionStatus& distribution,
                                        const string& tag,
                                        size_t imbalanceThreshold,
                                        vector<MigrateInfo>* migrations,
                                        set<ShardId>* usedShards) {
    const ShardId from = _getMostOverloadedShard(shardStats, distribution, tag, *usedShards);
    if (!from.isValid())
        return false;

    const size_t max = distribution.numberOfChunksInShardWithTag(from, tag);
    if (max == 0)
        return false;

    const ShardId to = _getLeastLoadedReceiverShard(shardStats, distribution, tag, *usedShards);
    if (!to.isValid()) {
        if (migrations->empty()) {
            log() << "No available shards to take chunks for tag [" << tag << "]";
        }
        return false;
    }

    const size_t min = distribution.numberOfChunksInShardWithTag(to, tag);
    if (min >= max)
        return false;

    const size_t totalNumberOfChunksWithTag =
        (tag.empty() ? distribution.totalChunks() : distribution.totalChunksWithTag(tag));

    size_t totalNumberOfShardsWithTag = 0;

    for (const auto& stat : shardStats) {
        if (tag.empty() || stat.shardTags.count(tag)) {
            totalNumberOfShardsWithTag++;
        }
    }

    // totalNumberOfShardsWithTag cannot be zero if the to shard is valid
    invariant(totalNumberOfShardsWithTag);
    invariant(totalNumberOfChunksWithTag >= max);

    // The ideal should be at least one per shard
    const size_t idealNumberOfChunksPerShardWithTag =
        (totalNumberOfChunksWithTag < totalNumberOfShardsWithTag)
        ? 1
        : (totalNumberOfChunksWithTag / totalNumberOfShardsWithTag);

    const size_t imbalance = max - idealNumberOfChunksPerShardWithTag;

    LOG(1) << "collection : " << distribution.nss().ns();
    LOG(1) << "zone       : " << tag;
    LOG(1) << "donor      : " << from << " chunks on " << max;
    LOG(1) << "receiver   : " << to << " chunks on " << min;
    LOG(1) << "ideal      : " << idealNumberOfChunksPerShardWithTag;
    LOG(1) << "threshold  : " << imbalanceThreshold;

    // Check whether it is necessary to balance within this zone
    if (imbalance < imbalanceThreshold)
        return false;

    const vector<ChunkType>& chunks = distribution.getChunks(from);

    unsigned numJumboChunks = 0;

    for (const auto& chunk : chunks) {
        if (distribution.getTagForChunk(chunk) != tag)
            continue;

        if (chunk.getJumbo()) {
            numJumboChunks++;
            continue;
        }

        migrations->emplace_back(distribution.nss().ns(), to, chunk);
        invariant(usedShards->insert(chunk.getShard()).second);
        invariant(usedShards->insert(to).second);
        return true;
    }

    if (numJumboChunks) {
        warning() << "Shard: " << from << ", collection: " << distribution.nss().ns()
                  << " has only jumbo chunks for zone \'" << tag
                  << "\' and cannot be balanced. Jumbo chunks count: " << numJumboChunks;
    }

    return false;
}
Пример #10
0
vector<MigrateInfo> BalancerPolicy::balance(const ShardStatisticsVector& shardStats,
                                            const DistributionStatus& distribution,
                                            bool shouldAggressivelyBalance) {
    vector<MigrateInfo> migrations;

    // Set of shards, which have already been used for migrations. Used so we don't return multiple
    // migrations for the same shard.
    set<ShardId> usedShards;

    // 1) Check for shards, which are in draining mode and must have chunks moved off of them
    {
        for (const auto& stat : shardStats) {
            if (!stat.isDraining)
                continue;

            const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId);

            if (chunks.empty())
                continue;

            // Now we know we need to move to chunks off this shard, but only if permitted by the
            // tags policy
            unsigned numJumboChunks = 0;

            // Since we have to move all chunks, lets just do in order
            for (const auto& chunk : chunks) {
                if (chunk.getJumbo()) {
                    numJumboChunks++;
                    continue;
                }

                const string tag = distribution.getTagForChunk(chunk);

                const ShardId to =
                    _getLeastLoadedReceiverShard(shardStats, distribution, tag, usedShards);
                if (!to.isValid()) {
                    if (migrations.empty()) {
                        warning() << "Chunk " << chunk
                                  << " is on a draining shard, but no appropriate recipient found";
                    }
                    continue;
                }

                invariant(to != stat.shardId);
                migrations.emplace_back(distribution.nss().ns(), to, chunk);
                invariant(usedShards.insert(stat.shardId).second);
                invariant(usedShards.insert(to).second);
                break;
            }

            if (migrations.empty()) {
                warning() << "Unable to find any chunk to move from draining shard " << stat.shardId
                          << ". numJumboChunks: " << numJumboChunks;
            }
        }
    }

    // 2) Check for chunks, which are on the wrong shard and must be moved off of it
    if (!distribution.tags().empty()) {
        for (const auto& stat : shardStats) {
            const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId);
            for (const auto& chunk : chunks) {
                const string tag = distribution.getTagForChunk(chunk);
                if (tag.empty())
                    continue;

                if (stat.shardTags.count(tag))
                    continue;

                if (chunk.getJumbo()) {
                    warning() << "chunk " << chunk << " violates tag " << tag
                              << ", but it is jumbo and cannot be moved";
                    continue;
                }

                const ShardId to =
                    _getLeastLoadedReceiverShard(shardStats, distribution, tag, usedShards);
                if (!to.isValid()) {
                    if (migrations.empty()) {
                        warning() << "chunk " << chunk << " violates tag " << tag
                                  << ", but no appropriate recipient found";
                    }
                    continue;
                }

                invariant(to != stat.shardId);
                migrations.emplace_back(distribution.nss().ns(), to, chunk);
                invariant(usedShards.insert(stat.shardId).second);
                invariant(usedShards.insert(to).second);
                break;
            }
        }
    }

    // 3) for each tag balance
    const size_t imbalanceThreshold = (shouldAggressivelyBalance || distribution.totalChunks() < 20)
        ? kAggressiveImbalanceThreshold
        : kDefaultImbalanceThreshold;

    vector<string> tagsPlusEmpty(distribution.tags().begin(), distribution.tags().end());
    tagsPlusEmpty.push_back("");

    for (const auto& tag : tagsPlusEmpty) {
        while (_singleZoneBalance(
            shardStats, distribution, tag, imbalanceThreshold, &migrations, &usedShards))
            ;
    }

    return migrations;
}
Пример #11
0
void ChunkType::setShard(const ShardId& shard) {
    invariant(shard.isValid());
    _shard = shard;
}
Пример #12
0
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints(
        OperationContext* txn,
        const ShardId& shardId,
        const NamespaceString& nss,
        const ShardKeyPattern& shardKeyPattern,
        ChunkVersion collectionVersion,
        const BSONObj& minKey,
        const BSONObj& maxKey,
const std::vector<BSONObj>& splitPoints) {
    invariant(!splitPoints.empty());
    invariant(minKey.woCompare(maxKey) < 0);

    const size_t kMaxSplitPoints = 8192;

    if (splitPoints.size() > kMaxSplitPoints) {
        return {ErrorCodes::BadValue,
                str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints
                << " parts at a time."};
    }

    BSONObjBuilder cmd;
    cmd.append("splitChunk", nss.ns());
    cmd.append("configdb",
               Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString());
    cmd.append("from", shardId.toString());
    cmd.append("keyPattern", shardKeyPattern.toBSON());
    collectionVersion.appendForCommands(&cmd);
    cmd.append(kMinKey, minKey);
    cmd.append(kMaxKey, maxKey);
    cmd.append("splitKeys", splitPoints);

    BSONObj cmdObj = cmd.obj();

    Status status{ErrorCodes::InternalError, "Uninitialized value"};
    BSONObj cmdResponse;

    auto shard = Grid::get(txn)->shardRegistry()->getShard(txn, shardId);
    if (!shard) {
        status =
            Status(ErrorCodes::ShardNotFound, str::stream() << "shard " << shardId << " not found");
    } else {
        auto cmdStatus = shard->runCommand(txn,
                                           ReadPreferenceSetting{ReadPreference::PrimaryOnly},
                                           "admin",
                                           cmdObj,
                                           Shard::RetryPolicy::kNotIdempotent);
        if (!cmdStatus.isOK()) {
            status = std::move(cmdStatus.getStatus());
        } else {
            status = std::move(cmdStatus.getValue().commandStatus);
            cmdResponse = std::move(cmdStatus.getValue().response);
        }
    }

    if (!status.isOK()) {
        log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status));
        return {status.code(), str::stream() << "split failed due to " << status.toString()};
    }

    BSONElement shouldMigrateElement;
    status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement);
    if (status.isOK()) {
        auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject());
        if (!chunkRangeStatus.isOK()) {
            return chunkRangeStatus.getStatus();
        }

        return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue()));
    } else if (status != ErrorCodes::NoSuchKey) {
        warning()
                << "Chunk migration will be skipped because splitChunk returned invalid response: "
                << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed"
                << causedBy(redact(status));
    }

    return boost::optional<ChunkRange>();
}
Пример #13
0
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints(
    OperationContext* txn,
    const ShardId& shardId,
    const NamespaceString& nss,
    const ShardKeyPattern& shardKeyPattern,
    ChunkVersion collectionVersion,
    const ChunkRange& chunkRange,
    const std::vector<BSONObj>& splitPoints) {
    invariant(!splitPoints.empty());

    const size_t kMaxSplitPoints = 8192;

    if (splitPoints.size() > kMaxSplitPoints) {
        return {ErrorCodes::BadValue,
                str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints
                              << " parts at a time."};
    }

    // Sanity check that we are not attempting to split at the boundaries of the chunk. This check
    // is already performed at chunk split commit time, but we are performing it here for parity
    // with old auto-split code, which might rely on it.
    if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMin() == splitPoints.front())) {
        const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString()
                                            << ", split point "
                                            << splitPoints.front()
                                            << " is exactly on chunk bounds");
        return {ErrorCodes::CannotSplit, msg};
    }

    if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMax() == splitPoints.back())) {
        const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString()
                                            << ", split point "
                                            << splitPoints.back()
                                            << " is exactly on chunk bounds");
        return {ErrorCodes::CannotSplit, msg};
    }

    BSONObjBuilder cmd;
    cmd.append("splitChunk", nss.ns());
    cmd.append("configdb",
               Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString());
    cmd.append("from", shardId.toString());
    cmd.append("keyPattern", shardKeyPattern.toBSON());
    collectionVersion.appendForCommands(&cmd);
    chunkRange.append(&cmd);
    cmd.append("splitKeys", splitPoints);

    BSONObj cmdObj = cmd.obj();

    Status status{ErrorCodes::InternalError, "Uninitialized value"};
    BSONObj cmdResponse;

    auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId);
    if (!shardStatus.isOK()) {
        status = shardStatus.getStatus();
    } else {
        auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts(
            txn,
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            "admin",
            cmdObj,
            Shard::RetryPolicy::kNotIdempotent);
        if (!cmdStatus.isOK()) {
            status = std::move(cmdStatus.getStatus());
        } else {
            status = std::move(cmdStatus.getValue().commandStatus);
            cmdResponse = std::move(cmdStatus.getValue().response);
        }
    }

    if (!status.isOK()) {
        log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status));
        return {status.code(), str::stream() << "split failed due to " << status.toString()};
    }

    BSONElement shouldMigrateElement;
    status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement);
    if (status.isOK()) {
        auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject());
        if (!chunkRangeStatus.isOK()) {
            return chunkRangeStatus.getStatus();
        }

        return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue()));
    } else if (status != ErrorCodes::NoSuchKey) {
        warning()
            << "Chunk migration will be skipped because splitChunk returned invalid response: "
            << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed"
            << causedBy(redact(status));
    }

    return boost::optional<ChunkRange>();
}
StatusWith<ShardDrainingStatus> ShardingCatalogManager::removeShard(OperationContext* opCtx,
                                                                    const ShardId& shardId) {
    // Check preconditions for removing the shard
    std::string name = shardId.toString();
    auto countStatus = _runCountCommandOnConfig(
        opCtx,
        ShardType::ConfigNS,
        BSON(ShardType::name() << NE << name << ShardType::draining(true)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    if (countStatus.getValue() > 0) {
        return Status(ErrorCodes::ConflictingOperationInProgress,
                      "Can't have more than one draining shard at a time");
    }

    countStatus =
        _runCountCommandOnConfig(opCtx, ShardType::ConfigNS, BSON(ShardType::name() << NE << name));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    if (countStatus.getValue() == 0) {
        return Status(ErrorCodes::IllegalOperation, "Can't remove last shard");
    }

    // Figure out if shard is already draining
    countStatus = _runCountCommandOnConfig(
        opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name << ShardType::draining(true)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }

    auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();

    if (countStatus.getValue() == 0) {
        log() << "going to start draining shard: " << name;

        // Record start in changelog
        const Status logStatus = Grid::get(opCtx)->catalogClient()->logChangeChecked(
            opCtx,
            "removeShard.start",
            "",
            BSON("shard" << name),
            ShardingCatalogClient::kLocalWriteConcern);
        if (!logStatus.isOK()) {
            return logStatus;
        }

        auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
            opCtx,
            ShardType::ConfigNS,
            BSON(ShardType::name() << name),
            BSON("$set" << BSON(ShardType::draining(true))),
            false,
            ShardingCatalogClient::kLocalWriteConcern);
        if (!updateStatus.isOK()) {
            log() << "error starting removeShard: " << name
                  << causedBy(redact(updateStatus.getStatus()));
            return updateStatus.getStatus();
        }

        shardRegistry->reload(opCtx);

        return ShardDrainingStatus::STARTED;
    }

    // Draining has already started, now figure out how many chunks and databases are still on the
    // shard.
    countStatus =
        _runCountCommandOnConfig(opCtx, ChunkType::ConfigNS, BSON(ChunkType::shard(name)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    const long long chunkCount = countStatus.getValue();

    countStatus =
        _runCountCommandOnConfig(opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::primary(name)));
    if (!countStatus.isOK()) {
        return countStatus.getStatus();
    }
    const long long databaseCount = countStatus.getValue();

    if (chunkCount > 0 || databaseCount > 0) {
        // Still more draining to do
        LOG(0) << "chunkCount: " << chunkCount;
        LOG(0) << "databaseCount: " << databaseCount;
        return ShardDrainingStatus::ONGOING;
    }

    // Draining is done, now finish removing the shard.
    log() << "going to remove shard: " << name;
    audit::logRemoveShard(opCtx->getClient(), name);

    Status status = Grid::get(opCtx)->catalogClient()->removeConfigDocuments(
        opCtx,
        ShardType::ConfigNS,
        BSON(ShardType::name() << name),
        ShardingCatalogClient::kLocalWriteConcern);
    if (!status.isOK()) {
        log() << "Error concluding removeShard operation on: " << name
              << "; err: " << status.reason();
        return status;
    }

    shardConnectionPool.removeHost(name);
    ReplicaSetMonitor::remove(name);

    shardRegistry->reload(opCtx);

    // Record finish in changelog
    Grid::get(opCtx)->catalogClient()->logChange(
        opCtx, "removeShard", "", BSON("shard" << name), ShardingCatalogClient::kLocalWriteConcern);

    return ShardDrainingStatus::COMPLETED;
}
Пример #15
0
MigrateInfo* BalancerPolicy::balance(const string& ns,
                                     const DistributionStatus& distribution,
                                     int balancedLastTime) {
    // 1) check for shards that policy require to us to move off of:
    //    draining only
    // 2) check tag policy violations
    // 3) then we make sure chunks are balanced for each tag

    // ----

    // 1) check things we have to move
    {
        for (const auto& stat : distribution.getStats()) {
            if (!stat.isDraining)
                continue;

            if (distribution.numberOfChunksInShard(stat.shardId) == 0)
                continue;

            // now we know we need to move to chunks off this shard
            // we will if we are allowed
            const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId);
            unsigned numJumboChunks = 0;

            // since we have to move all chunks, lets just do in order
            for (unsigned i = 0; i < chunks.size(); i++) {
                const ChunkType& chunkToMove = chunks[i];
                if (chunkToMove.getJumbo()) {
                    numJumboChunks++;
                    continue;
                }

                string tag = distribution.getTagForChunk(chunkToMove);
                const ShardId to = distribution.getBestReceieverShard(tag);

                if (to.size() == 0) {
                    warning() << "want to move chunk: " << chunkToMove << "(" << tag << ") "
                              << "from " << stat.shardId << " but can't find anywhere to put it";
                    continue;
                }

                log() << "going to move " << chunkToMove << " from " << stat.shardId << "(" << tag
                      << ")"
                      << " to " << to;

                return new MigrateInfo(ns, to, stat.shardId, chunkToMove);
            }

            warning() << "can't find any chunk to move from: " << stat.shardId
                      << " but we want to. "
                      << " numJumboChunks: " << numJumboChunks;
        }
    }

    // 2) tag violations
    if (distribution.tags().size() > 0) {
        for (const auto& stat : distribution.getStats()) {
            const vector<ChunkType>& chunks = distribution.getChunks(stat.shardId);
            for (unsigned j = 0; j < chunks.size(); j++) {
                const ChunkType& chunk = chunks[j];
                string tag = distribution.getTagForChunk(chunk);

                if (tag.empty() || stat.shardTags.count(tag))
                    continue;

                // uh oh, this chunk is in the wrong place
                log() << "chunk " << chunk << " is not on a shard with the right tag: " << tag;

                if (chunk.getJumbo()) {
                    warning() << "chunk " << chunk << " is jumbo, so cannot be moved";
                    continue;
                }

                const ShardId to = distribution.getBestReceieverShard(tag);
                if (to.size() == 0) {
                    log() << "no where to put it :(";
                    continue;
                }

                invariant(to != stat.shardId);
                log() << " going to move to: " << to;
                return new MigrateInfo(ns, to, stat.shardId, chunk);
            }
        }
    }

    // 3) for each tag balance

    int threshold = 8;
    if (balancedLastTime || distribution.totalChunks() < 20)
        threshold = 2;
    else if (distribution.totalChunks() < 80)
        threshold = 4;

    // randomize the order in which we balance the tags
    // this is so that one bad tag doesn't prevent others from getting balanced
    vector<string> tags;
    {
        set<string> t = distribution.tags();
        for (set<string>::const_iterator i = t.begin(); i != t.end(); ++i)
            tags.push_back(*i);
        tags.push_back("");

        std::random_shuffle(tags.begin(), tags.end());
    }

    for (unsigned i = 0; i < tags.size(); i++) {
        string tag = tags[i];

        const ShardId from = distribution.getMostOverloadedShard(tag);
        if (from.size() == 0)
            continue;

        unsigned max = distribution.numberOfChunksInShardWithTag(from, tag);
        if (max == 0)
            continue;

        string to = distribution.getBestReceieverShard(tag);
        if (to.size() == 0) {
            log() << "no available shards to take chunks for tag [" << tag << "]";
            return NULL;
        }

        unsigned min = distribution.numberOfChunksInShardWithTag(to, tag);

        const int imbalance = max - min;

        LOG(1) << "collection : " << ns;
        LOG(1) << "donor      : " << from << " chunks on " << max;
        LOG(1) << "receiver   : " << to << " chunks on " << min;
        LOG(1) << "threshold  : " << threshold;

        if (imbalance < threshold)
            continue;

        const vector<ChunkType>& chunks = distribution.getChunks(from);
        unsigned numJumboChunks = 0;
        for (unsigned j = 0; j < chunks.size(); j++) {
            const ChunkType& chunk = chunks[j];
            if (distribution.getTagForChunk(chunk) != tag)
                continue;

            if (chunk.getJumbo()) {
                numJumboChunks++;
                continue;
            }

            log() << " ns: " << ns << " going to move " << chunk << " from: " << from
                  << " to: " << to << " tag [" << tag << "]";
            return new MigrateInfo(ns, to, from, chunk);
        }

        if (numJumboChunks) {
            error() << "shard: " << from << " ns: " << ns
                    << " has too many chunks, but they are all jumbo "
                    << " numJumboChunks: " << numJumboChunks;
            continue;
        }

        verify(false);  // should be impossible
    }

    // Everything is balanced here!
    return NULL;
}