Exemplo n.º 1
0
void MetadataManager::append(BSONObjBuilder* builder) const {
    stdx::lock_guard<stdx::mutex> lg(_managerLock);

    _rangesToClean.append(builder);

    BSONArrayBuilder pcArr(builder->subarrayStart("pendingChunks"));
    for (const auto& entry : _receivingChunks) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        pcArr.append(obj.done());
    }
    pcArr.done();

    if (_metadata.empty()) {
        return;
    }

    BSONArrayBuilder amrArr(builder->subarrayStart("activeMetadataRanges"));
    for (const auto& entry : _metadata.back()->metadata.getChunks()) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        amrArr.append(obj.done());
    }
    amrArr.done();
}
Exemplo n.º 2
0
void MetadataManager::_removeRangeToClean_inlock(const ChunkRange& range) {
    auto it = _rangesToClean.upper_bound(range.getMin());
    // We want our iterator to point at the greatest value
    // that is still less than or equal to range.
    if (it != _rangesToClean.begin()) {
        --it;
    }

    for (; it != _rangesToClean.end() && it->first < range.getMax();) {
        if (it->second <= range.getMin()) {
            ++it;
            continue;
        }

        // There's overlap between *it and range so we remove *it
        // and then replace with new ranges.
        BSONObj oldMin = it->first, oldMax = it->second;
        _rangesToClean.erase(it++);
        if (oldMin < range.getMin()) {
            _addRangeToClean_inlock(ChunkRange(oldMin, range.getMin()));
        }

        if (oldMax > range.getMax()) {
            _addRangeToClean_inlock(ChunkRange(range.getMax(), oldMax));
        }
    }
}
Exemplo n.º 3
0
void MetadataManager::append(BSONObjBuilder* builder) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);

    BSONArrayBuilder rtcArr(builder->subarrayStart("rangesToClean"));
    for (const auto& entry : _rangesToClean) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        rtcArr.append(obj.done());
    }
    rtcArr.done();

    BSONArrayBuilder pcArr(builder->subarrayStart("pendingChunks"));
    for (const auto& entry : _receivingChunks) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        pcArr.append(obj.done());
    }
    pcArr.done();

    BSONArrayBuilder amrArr(builder->subarrayStart("activeMetadataRanges"));
    for (const auto& entry : _activeMetadataTracker->metadata->getChunks()) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        amrArr.append(obj.done());
    }
    amrArr.done();
}
Exemplo n.º 4
0
StatusWith<DistLockManager::ScopedDistLock> MigrationManager::_getDistLock(
    OperationContext* txn, const Migration& migration) {
    const std::string whyMessage(str::stream() << "migrating chunk "
                                               << ChunkRange(migration.chunkInfo.migrateInfo.minKey,
                                                             migration.chunkInfo.migrateInfo.maxKey)
                                                      .toString()
                                               << " in "
                                               << migration.chunkInfo.migrateInfo.ns);

    StatusWith<DistLockManager::ScopedDistLock> distLockStatus =
        Grid::get(txn)->catalogClient(txn)->distLock(
            txn, migration.chunkInfo.migrateInfo.ns, whyMessage);

    if (!distLockStatus.isOK()) {
        const std::string msg = str::stream()
            << "Could not acquire collection lock for " << migration.chunkInfo.migrateInfo.ns
            << " to migrate chunk " << redact(ChunkRange(migration.chunkInfo.migrateInfo.minKey,
                                                         migration.chunkInfo.migrateInfo.maxKey)
                                                  .toString())
            << " due to " << distLockStatus.getStatus().toString();
        warning() << msg;
        return {distLockStatus.getStatus().code(), msg};
    }

    return std::move(distLockStatus.getValue());
}
Exemplo n.º 5
0
StatusWith<ChunkRange> ChunkRange::fromBSON(const BSONObj& obj) {
    BSONElement minKey;
    {
        Status minKeyStatus = extractObject(obj, kMinKey, &minKey);
        if (!minKeyStatus.isOK()) {
            return minKeyStatus;
        }
    }

    BSONElement maxKey;
    {
        Status maxKeyStatus = extractObject(obj, kMaxKey, &maxKey);
        if (!maxKeyStatus.isOK()) {
            return maxKeyStatus;
        }
    }

    if (SimpleBSONObjComparator::kInstance.evaluate(minKey.Obj() >= maxKey.Obj())) {
        return {ErrorCodes::FailedToParse,
                str::stream() << "min: " << minKey.Obj() << " should be less than max: "
                              << maxKey.Obj()};
    }

    return ChunkRange(minKey.Obj().getOwned(), maxKey.Obj().getOwned());
}
std::string MoveChunkRequest::toString() const {
    std::stringstream ss;
    ss << "ns: " << getNss().ns() << ", " << redact(ChunkRange(getMinKey(), getMaxKey()).toString())
       << ", fromShard: " << getFromShardId() << ", toShard: " << getToShardId();

    return ss.str();
}
Exemplo n.º 7
0
StatusWith<ChunkRange> ChunkRange::fromBSON(const BSONObj& obj) {
    BSONElement minKey;
    {
        Status minKeyStatus = bsonExtractTypedField(obj, kMinKey, Object, &minKey);
        if (!minKeyStatus.isOK()) {
            return {minKeyStatus.code(),
                    str::stream() << "Invalid min key due to " << minKeyStatus.reason()};
        }

        if (minKey.Obj().isEmpty()) {
            return {ErrorCodes::BadValue, "The min key cannot be empty"};
        }
    }

    BSONElement maxKey;
    {
        Status maxKeyStatus = bsonExtractTypedField(obj, kMaxKey, Object, &maxKey);
        if (!maxKeyStatus.isOK()) {
            return {maxKeyStatus.code(),
                    str::stream() << "Invalid max key due to " << maxKeyStatus.reason()};
        }

        if (maxKey.Obj().isEmpty()) {
            return {ErrorCodes::BadValue, "The max key cannot be empty"};
        }
    }

    if (SimpleBSONObjComparator::kInstance.evaluate(minKey.Obj() >= maxKey.Obj())) {
        return {ErrorCodes::FailedToParse,
                str::stream() << "min: " << minKey.Obj() << " should be less than max: "
                              << maxKey.Obj()};
    }

    return ChunkRange(minKey.Obj().getOwned(), maxKey.Obj().getOwned());
}
Exemplo n.º 8
0
boost::optional<ChunkRange> ChunkRange::overlapWith(ChunkRange const& other) const {
    auto le = [](auto const& a, auto const& b) { return a.woCompare(b) <= 0; };
    if (le(other._maxKey, _minKey) || le(_maxKey, other._minKey)) {
        return boost::none;
    }
    return ChunkRange(le(_minKey, other._minKey) ? other._minKey : _minKey,
                      le(_maxKey, other._maxKey) ? _maxKey : other._maxKey);
}
Status ActiveMigrationsRegistry::ActiveMoveChunkState::constructErrorStatus() const {
    return {ErrorCodes::ConflictingOperationInProgress,
            str::stream() << "Unable to start new migration because this shard is currently "
                             "donating chunk "
                          << ChunkRange(args.getMinKey(), args.getMaxKey()).toString()
                          << " for namespace "
                          << args.getNss().ns()
                          << " to "
                          << args.getToShardId()};
}
Exemplo n.º 10
0
StatusWith<ChunkRange> ChunkRange::fromBSON(const BSONObj& obj) {
    BSONElement minKey;
    {
        Status minKeyStatus = bsonExtractTypedField(obj, kMinKey, Object, &minKey);
        if (!minKeyStatus.isOK()) {
            return {minKeyStatus.code(),
                    str::stream() << "Invalid min key due to " << minKeyStatus.reason()};
        }
    }

    BSONElement maxKey;
    {
        Status maxKeyStatus = bsonExtractTypedField(obj, kMaxKey, Object, &maxKey);
        if (!maxKeyStatus.isOK()) {
            return {maxKeyStatus.code(),
                    str::stream() << "Invalid max key due to " << maxKeyStatus.reason()};
        }
    }

    return ChunkRange(minKey.Obj().getOwned(), maxKey.Obj().getOwned());
}
Exemplo n.º 11
0
ChunkRange ChunkRange::unionWith(ChunkRange const& other) const {
    auto le = [](auto const& a, auto const& b) { return a.woCompare(b) <= 0; };
    return ChunkRange(le(_minKey, other._minKey) ? _minKey : other._minKey,
                      le(_maxKey, other._maxKey) ? other._maxKey : _maxKey);
}
Exemplo n.º 12
0
MigrationSourceManager::MigrationSourceManager(OperationContext* txn, MoveChunkRequest request)
    : _args(std::move(request)), _startTime() {
    invariant(!txn->lockState()->isLocked());

    const auto& oss = OperationShardingState::get(txn);
    if (!oss.hasShardVersion()) {
        uasserted(ErrorCodes::InvalidOptions, "collection version is missing");
    }

    // Even though the moveChunk command transmits a value in the operation's shardVersion field,
    // this value does not actually contain the shard version, but the global collection version.
    const ChunkVersion expectedCollectionVersion = oss.getShardVersion(_args.getNss());

    log() << "Starting chunk migration for "
          << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
          << " with expected collection version " << expectedCollectionVersion;

    // Now that the collection is locked, snapshot the metadata and fetch the latest versions
    ShardingState* const shardingState = ShardingState::get(txn);

    ChunkVersion shardVersion;

    Status refreshStatus =
        shardingState->refreshMetadataNow(txn, _args.getNss().ns(), &shardVersion);
    if (!refreshStatus.isOK()) {
        uasserted(refreshStatus.code(),
                  str::stream() << "cannot start migrate of chunk "
                                << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                                << " due to "
                                << refreshStatus.toString());
    }

    if (shardVersion.majorVersion() == 0) {
        // If the major version is zero, this means we do not have any chunks locally to migrate in
        // the first place
        uasserted(ErrorCodes::IncompatibleShardingMetadata,
                  str::stream() << "cannot start migrate of chunk "
                                << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                                << " with zero shard version");
    }

    // Snapshot the committed metadata from the time the migration starts
    {
        ScopedTransaction scopedXact(txn, MODE_IS);
        AutoGetCollection autoColl(txn, _args.getNss(), MODE_IS);

        auto css = CollectionShardingState::get(txn, _args.getNss());
        _committedMetadata = css->getMetadata();
    }

    const ChunkVersion collectionVersion = _committedMetadata->getCollVersion();

    if (expectedCollectionVersion.epoch() != collectionVersion.epoch()) {
        throw SendStaleConfigException(
            _args.getNss().ns(),
            str::stream() << "cannot move chunk "
                          << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                          << " because collection may have been dropped. "
                          << "current epoch: "
                          << collectionVersion.epoch()
                          << ", cmd epoch: "
                          << expectedCollectionVersion.epoch(),
            expectedCollectionVersion,
            collectionVersion);
    }

    // With nonzero shard version, we must have a coll version >= our shard version
    invariant(collectionVersion >= shardVersion);

    // With nonzero shard version, we must have a shard key
    invariant(!_committedMetadata->getKeyPattern().isEmpty());

    ChunkType origChunk;
    if (!_committedMetadata->getNextChunk(_args.getMinKey(), &origChunk)) {
        // If this assertion is hit, it means that whoever called the shard moveChunk command
        // (mongos or the CSRS balancer) did not check whether the chunk actually belongs to this
        // shard. It is a benign error and does not indicate data corruption.
        uasserted(40145,
                  str::stream() << "Chunk with bounds "
                                << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                                << " is not owned by this shard.");
    }

    uassert(40146,
            str::stream() << "Unable to find chunk with the exact bounds "
                          << ChunkRange(_args.getMinKey(), _args.getMaxKey()).toString()
                          << " at collection version "
                          << collectionVersion.toString()
                          << ". This indicates corrupted metadata.",
            origChunk.getMin().woCompare(_args.getMinKey()) == 0 &&
                origChunk.getMax().woCompare(_args.getMaxKey()) == 0);
}
Exemplo n.º 13
0
void ChunkSplitter::_runAutosplit(const NamespaceString& nss,
                                  const BSONObj& min,
                                  const BSONObj& max,
                                  long dataWritten) {
    if (!_isPrimary) {
        return;
    }

    try {
        const auto opCtx = cc().makeOperationContext();
        const auto routingInfo = uassertStatusOK(
            Grid::get(opCtx.get())->catalogCache()->getCollectionRoutingInfo(opCtx.get(), nss));

        uassert(ErrorCodes::NamespaceNotSharded,
                "Could not split chunk. Collection is no longer sharded",
                routingInfo.cm());

        const auto cm = routingInfo.cm();
        const auto chunk = cm->findIntersectingChunkWithSimpleCollation(min);

        // Stop if chunk's range differs from the range we were expecting to split.
        if ((0 != chunk.getMin().woCompare(min)) || (0 != chunk.getMax().woCompare(max)) ||
            (chunk.getShardId() != ShardingState::get(opCtx.get())->getShardName())) {
            LOG(1) << "Cannot auto-split chunk with range '"
                   << redact(ChunkRange(min, max).toString()) << "' for nss '" << nss
                   << "' on shard '" << ShardingState::get(opCtx.get())->getShardName()
                   << "' because since scheduling auto-split the chunk has been changed to '"
                   << redact(chunk.toString()) << "'";
            return;
        }

        const ChunkRange chunkRange(chunk.getMin(), chunk.getMax());

        const auto balancerConfig = Grid::get(opCtx.get())->getBalancerConfiguration();
        // Ensure we have the most up-to-date balancer configuration
        uassertStatusOK(balancerConfig->refreshAndCheck(opCtx.get()));

        if (!balancerConfig->getShouldAutoSplit()) {
            return;
        }

        const uint64_t maxChunkSizeBytes = balancerConfig->getMaxChunkSizeBytes();

        LOG(1) << "about to initiate autosplit: " << redact(chunk.toString())
               << " dataWritten since last check: " << dataWritten
               << " maxChunkSizeBytes: " << maxChunkSizeBytes;

        auto splitPoints = uassertStatusOK(splitVector(opCtx.get(),
                                                       nss,
                                                       cm->getShardKeyPattern().toBSON(),
                                                       chunk.getMin(),
                                                       chunk.getMax(),
                                                       false,
                                                       boost::none,
                                                       boost::none,
                                                       boost::none,
                                                       maxChunkSizeBytes));

        if (splitPoints.size() <= 1) {
            // No split points means there isn't enough data to split on; 1 split point means we
            // have between half the chunk size to full chunk size so there is no need to split yet
            return;
        }

        // We assume that if the chunk being split is the first (or last) one on the collection,
        // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the
        // very first (or last) key as a split point.
        //
        // This heuristic is skipped for "special" shard key patterns that are not likely to produce
        // monotonically increasing or decreasing values (e.g. hashed shard keys).

        // Keeps track of the minKey of the top chunk after the split so we can migrate the chunk.
        BSONObj topChunkMinKey;

        if (KeyPattern::isOrderedKeyPattern(cm->getShardKeyPattern().toBSON())) {
            if (0 ==
                cm->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk.getMin())) {
                // MinKey is infinity (This is the first chunk on the collection)
                BSONObj key =
                    findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), true);
                if (!key.isEmpty()) {
                    splitPoints.front() = key.getOwned();
                    topChunkMinKey = cm->getShardKeyPattern().getKeyPattern().globalMin();
                }
            } else if (0 ==
                       cm->getShardKeyPattern().getKeyPattern().globalMax().woCompare(
                           chunk.getMax())) {
                // MaxKey is infinity (This is the last chunk on the collection)
                BSONObj key =
                    findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), false);
                if (!key.isEmpty()) {
                    splitPoints.back() = key.getOwned();
                    topChunkMinKey = key.getOwned();
                }
            }
        }

        uassertStatusOK(splitChunkAtMultiplePoints(opCtx.get(),
                                                   chunk.getShardId(),
                                                   nss,
                                                   cm->getShardKeyPattern(),
                                                   cm->getVersion(),
                                                   chunkRange,
                                                   splitPoints));

        const bool shouldBalance = isAutoBalanceEnabled(opCtx.get(), nss, balancerConfig);

        log() << "autosplitted " << nss << " chunk: " << redact(chunk.toString()) << " into "
              << (splitPoints.size() + 1) << " parts (maxChunkSizeBytes " << maxChunkSizeBytes
              << ")"
              << (topChunkMinKey.isEmpty() ? "" : " (top chunk migration suggested" +
                          (std::string)(shouldBalance ? ")" : ", but no migrations allowed)"));

        // Balance the resulting chunks if the autobalance option is enabled and if we split at the
        // first or last chunk on the collection as part of top chunk optimization.

        if (!shouldBalance || topChunkMinKey.isEmpty()) {
            return;
        }

        // Tries to move the top chunk out of the shard to prevent the hot spot from staying on a
        // single shard. This is based on the assumption that succeeding inserts will fall on the
        // top chunk.
        moveChunk(opCtx.get(), nss, topChunkMinKey);
    } catch (const DBException& ex) {
        log() << "Unable to auto-split chunk " << redact(ChunkRange(min, max).toString())
              << " in nss " << nss << causedBy(redact(ex.toStatus()));
    } catch (const std::exception& e) {
        log() << "caught exception while splitting chunk: " << redact(e.what());
    }
}
Exemplo n.º 14
0
void MigrationManager::_executeMigrations(OperationContext* txn,
                                          MigrationStatuses* migrationStatuses) {
    for (auto& migration : _activeMigrations) {
        const NamespaceString nss(migration.chunkInfo.migrateInfo.ns);

        auto scopedCMStatus = ScopedChunkManager::getExisting(txn, nss);
        if (!scopedCMStatus.isOK()) {
            // Unable to find the ChunkManager for "nss" for whatever reason; abandon this
            // migration and proceed to the next.
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            migrationStatuses->insert(MigrationStatuses::value_type(
                migration.chunkInfo.migrateInfo.getName(), std::move(scopedCMStatus.getStatus())));
            continue;
        }

        ChunkManager* const chunkManager = scopedCMStatus.getValue().cm();
        auto chunk =
            chunkManager->findIntersectingChunk(txn, migration.chunkInfo.migrateInfo.minKey);

        {
            // No need to lock the mutex. Only this function and _takeDistLockForAMigration
            // manipulate "_distributedLocks". No need to protect serial actions.
            if (!_takeDistLockForAMigration(txn, migration, migrationStatuses)) {
                // If there is a lock conflict between the balancer and the shard, or a shard and a
                // shard, the migration has been rescheduled. Otherwise an attempt to take the lock
                // failed for whatever reason and this migration is being abandoned.
                continue;
            }
        }

        const MigrationRequest& migrationRequest = migration.chunkInfo;

        BSONObjBuilder builder;
        MoveChunkRequest::appendAsCommand(
            &builder,
            nss,
            chunkManager->getVersion(),
            Grid::get(txn)->shardRegistry()->getConfigServerConnectionString(),
            migrationRequest.migrateInfo.from,
            migrationRequest.migrateInfo.to,
            ChunkRange(chunk->getMin(), chunk->getMax()),
            migrationRequest.maxChunkSizeBytes,
            migrationRequest.secondaryThrottle,
            migrationRequest.waitForDelete,
            migration.oldShard ? true : false);  // takeDistLock flag.

        BSONObj moveChunkRequestObj = builder.obj();

        const auto recipientShard =
            grid.shardRegistry()->getShard(txn, migration.chunkInfo.migrateInfo.from);
        const auto host = recipientShard->getTargeter()->findHost(
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            RemoteCommandTargeter::selectFindHostMaxWaitTime(txn));
        if (!host.isOK()) {
            // Unable to find a target shard for whatever reason; abandon this migration and proceed
            // to the next.
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            migrationStatuses->insert(MigrationStatuses::value_type(
                migration.chunkInfo.migrateInfo.getName(), std::move(host.getStatus())));
            continue;
        }

        RemoteCommandRequest remoteRequest(host.getValue(), "admin", moveChunkRequestObj);

        StatusWith<RemoteCommandResponse> remoteCommandResponse(
            Status{ErrorCodes::InternalError, "Uninitialized value"});

        executor::TaskExecutor* executor = Grid::get(txn)->getExecutorPool()->getFixedExecutor();

        StatusWith<executor::TaskExecutor::CallbackHandle> callbackHandleWithStatus =
            executor->scheduleRemoteCommand(remoteRequest,
                                            stdx::bind(&MigrationManager::_checkMigrationCallback,
                                                       this,
                                                       stdx::placeholders::_1,
                                                       txn,
                                                       &migration,
                                                       migrationStatuses));

        if (!callbackHandleWithStatus.isOK()) {
            // Scheduling the migration moveChunk failed.
            stdx::lock_guard<stdx::mutex> lk(_mutex);
            migrationStatuses->insert(
                MigrationStatuses::value_type(migration.chunkInfo.migrateInfo.getName(),
                                              std::move(callbackHandleWithStatus.getStatus())));
            continue;
        }

        // The moveChunk command was successfully scheduled. Store the callback handle so that the
        // command's return can be waited for later.
        stdx::lock_guard<stdx::mutex> lk(_mutex);
        migration.setCallbackHandle(std::move(callbackHandleWithStatus.getValue()));
    }

    _waitForMigrations(txn);
    // At this point, there are no parallel running threads so it is safe not to lock the mutex.

    // All the migrations have returned, release all of the distributed locks that are no longer
    // being used.
    _distributedLocks.clear();

    // If there are rescheduled migrations, move them to active and run the function again.
    if (!_rescheduledMigrations.empty()) {
        // Clear all the callback handles of the rescheduled migrations.
        for (auto& migration : _rescheduledMigrations) {
            migration.clearCallbackHandle();
        }

        _activeMigrations = std::move(_rescheduledMigrations);
        _rescheduledMigrations.clear();
        _executeMigrations(txn, migrationStatuses);
    } else {
        _activeMigrations.clear();
    }
}
Exemplo n.º 15
0
StatusWith<ScopedMigrationRequest> ScopedMigrationRequest::writeMigration(
    OperationContext* opCtx, const MigrateInfo& migrateInfo, bool waitForDelete) {

    // Try to write a unique migration document to config.migrations.
    const MigrationType migrationType(migrateInfo, waitForDelete);

    for (int retry = 0; retry < kDuplicateKeyErrorMaxRetries; ++retry) {
        Status result = grid.catalogClient()->insertConfigDocument(
            opCtx, MigrationType::ConfigNS, migrationType.toBSON(), kMajorityWriteConcern);

        if (result == ErrorCodes::DuplicateKey) {
            // If the exact migration described by "migrateInfo" is active, return a scoped object
            // for the request because this migration request will join the active one once
            // scheduled.
            auto statusWithMigrationQueryResult =
                grid.shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
                    opCtx,
                    ReadPreferenceSetting{ReadPreference::PrimaryOnly},
                    repl::ReadConcernLevel::kLocalReadConcern,
                    NamespaceString(MigrationType::ConfigNS),
                    BSON(MigrationType::name(migrateInfo.getName())),
                    BSONObj(),
                    boost::none);
            if (!statusWithMigrationQueryResult.isOK()) {
                return {statusWithMigrationQueryResult.getStatus().code(),
                        str::stream()
                            << "Failed to verify whether conflicting migration is in "
                            << "progress for migration '"
                            << redact(migrateInfo.toString())
                            << "' while trying to query config.migrations."
                            << causedBy(redact(statusWithMigrationQueryResult.getStatus()))};
            }
            if (statusWithMigrationQueryResult.getValue().docs.empty()) {
                // The document that caused the DuplicateKey error is no longer in the collection,
                // so retrying the insert might succeed.
                continue;
            }
            invariant(statusWithMigrationQueryResult.getValue().docs.size() == 1);

            BSONObj activeMigrationBSON = statusWithMigrationQueryResult.getValue().docs.front();
            auto statusWithActiveMigration = MigrationType::fromBSON(activeMigrationBSON);
            if (!statusWithActiveMigration.isOK()) {
                return {statusWithActiveMigration.getStatus().code(),
                        str::stream() << "Failed to verify whether conflicting migration is in "
                                      << "progress for migration '"
                                      << redact(migrateInfo.toString())
                                      << "' while trying to parse active migration document '"
                                      << redact(activeMigrationBSON.toString())
                                      << "'."
                                      << causedBy(redact(statusWithActiveMigration.getStatus()))};
            }

            MigrateInfo activeMigrateInfo = statusWithActiveMigration.getValue().toMigrateInfo();
            if (activeMigrateInfo.to != migrateInfo.to ||
                activeMigrateInfo.from != migrateInfo.from) {
                log() << "Failed to write document '" << redact(migrateInfo.toString())
                      << "' to config.migrations because there is already an active migration for"
                      << " that chunk: '" << redact(activeMigrateInfo.toString()) << "'."
                      << causedBy(redact(result));
                return result;
            }

            result = Status::OK();
        }

        // As long as there isn't a DuplicateKey error, the document may have been written, and it's
        // safe (won't delete another migration's document) and necessary to try to clean up the
        // document via the destructor.
        ScopedMigrationRequest scopedMigrationRequest(
            opCtx, NamespaceString(migrateInfo.ns), migrateInfo.minKey);

        // If there was a write error, let the object go out of scope and clean up in the
        // destructor.
        if (!result.isOK()) {
            return result;
        }

        return std::move(scopedMigrationRequest);
    }

    return Status(ErrorCodes::OperationFailed,
                  str::stream() << "Failed to insert the config.migrations document after max "
                                << "number of retries. Chunk '"
                                << ChunkRange(migrateInfo.minKey, migrateInfo.maxKey).toString()
                                << "' in collection '"
                                << migrateInfo.ns
                                << "' was being moved (somewhere) by another operation.");
}
Exemplo n.º 16
0
void MetadataManager::refreshActiveMetadata(std::unique_ptr<CollectionMetadata> remoteMetadata) {
    LOG(1) << "Refreshing the active metadata from "
           << (_activeMetadataTracker->metadata ? _activeMetadataTracker->metadata->toStringBasic()
                                                : "(empty)")
           << ", to " << (remoteMetadata ? remoteMetadata->toStringBasic() : "(empty)");

    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);

    // Collection is not sharded anymore
    if (!remoteMetadata) {
        log() << "Marking collection as not sharded.";

        _receivingChunks.clear();
        _rangesToClean.clear();

        _setActiveMetadata_inlock(nullptr);
        return;
    }

    invariant(!remoteMetadata->getCollVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED()));
    invariant(!remoteMetadata->getShardVersion().isWriteCompatibleWith(ChunkVersion::UNSHARDED()));

    // Collection is not sharded currently
    if (!_activeMetadataTracker->metadata) {
        log() << "Marking collection as sharded with version " << remoteMetadata->toStringBasic();

        invariant(_receivingChunks.empty());
        invariant(_rangesToClean.empty());

        _setActiveMetadata_inlock(std::move(remoteMetadata));
        return;
    }

    // If the metadata being installed has a different epoch from ours, this means the collection
    // was dropped and recreated, so we must entirely reset the metadata state
    if (_activeMetadataTracker->metadata->getCollVersion().epoch() !=
        remoteMetadata->getCollVersion().epoch()) {
        log() << "Overwriting collection metadata due to epoch change.";

        _receivingChunks.clear();
        _rangesToClean.clear();

        _setActiveMetadata_inlock(std::move(remoteMetadata));
        return;
    }

    // We already have newer version
    if (_activeMetadataTracker->metadata->getCollVersion() >= remoteMetadata->getCollVersion()) {
        LOG(1) << "Attempted to refresh active metadata "
               << _activeMetadataTracker->metadata->toStringBasic() << " with an older version "
               << remoteMetadata->toStringBasic();

        return;
    }

    // Resolve any receiving chunks, which might have completed by now
    for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) {
        const BSONObj min = it->first;
        const BSONObj max = it->second;

        // Our pending range overlaps at least one chunk
        if (rangeMapContains(remoteMetadata->getChunks(), min, max)) {
            // The remote metadata contains a chunk we were earlier in the process of receiving, so
            // we deem it successfully received.
            LOG(2) << "Verified chunk " << ChunkRange(min, max).toString()
                   << " was migrated earlier to this shard";

            _receivingChunks.erase(it++);
            continue;
        } else if (!rangeMapOverlaps(remoteMetadata->getChunks(), min, max)) {
            ++it;
            continue;
        }

        // Partial overlap indicates that the earlier migration has failed, but the chunk being
        // migrated underwent some splits and other migrations and ended up here again. In this
        // case, we will request full reload of the metadata. Currently this cannot happen, because
        // all migrations are with the explicit knowledge of the recipient shard. However, we leave
        // the option open so that chunk splits can do empty chunk move without having to notify the
        // recipient.
        RangeVector overlappedChunks;
        getRangeMapOverlap(remoteMetadata->getChunks(), min, max, &overlappedChunks);

        for (const auto& overlapChunkMin : overlappedChunks) {
            auto itRecv = _receivingChunks.find(overlapChunkMin.first);
            invariant(itRecv != _receivingChunks.end());

            const ChunkRange receivingRange(itRecv->first, itRecv->second);

            _receivingChunks.erase(itRecv);

            // Make sure any potentially partially copied chunks are scheduled to be cleaned up
            _addRangeToClean_inlock(receivingRange);
        }

        // Need to reset the iterator
        it = _receivingChunks.begin();
    }

    // For compatibility with the current range deleter, which is driven entirely by the contents of
    // the CollectionMetadata update the pending chunks
    for (const auto& receivingChunk : _receivingChunks) {
        ChunkType chunk;
        chunk.setMin(receivingChunk.first);
        chunk.setMax(receivingChunk.second);
        remoteMetadata = remoteMetadata->clonePlusPending(chunk);
    }

    _setActiveMetadata_inlock(std::move(remoteMetadata));
}
Exemplo n.º 17
0
ChunkRange MetadataManager::getNextRangeToClean() {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
    invariant(!_rangesToClean.empty());
    auto it = _rangesToClean.begin();
    return ChunkRange(it->first, it->second.getMax());
}