Example #1
0
void MetadataManager::append(BSONObjBuilder* builder) const {
    stdx::lock_guard<stdx::mutex> lg(_managerLock);

    _rangesToClean.append(builder);

    BSONArrayBuilder pcArr(builder->subarrayStart("pendingChunks"));
    for (const auto& entry : _receivingChunks) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        pcArr.append(obj.done());
    }
    pcArr.done();

    if (_metadata.empty()) {
        return;
    }

    BSONArrayBuilder amrArr(builder->subarrayStart("activeMetadataRanges"));
    for (const auto& entry : _metadata.back()->metadata.getChunks()) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        amrArr.append(obj.done());
    }
    amrArr.done();
}
Example #2
0
ChunkRange::ChunkRange(const ChunkRange& min, const ChunkRange& max)
    : _manager(min.getManager()),
      _shardId(min.getShardId()),
      _min(min.getMin()),
      _max(max.getMax()) {
    invariant(min.getShardId() == max.getShardId());
    invariant(min.getManager() == max.getManager());
    invariant(min.getMax() == max.getMin());
}
Example #3
0
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder,
                                       const NamespaceString& nss,
                                       const ChunkVersion& shardVersion,
                                       const ConnectionString& configServerConnectionString,
                                       const ShardId& fromShardId,
                                       const ShardId& toShardId,
                                       const ChunkRange& range,
                                       int64_t maxChunkSizeBytes,
                                       const MigrationSecondaryThrottleOptions& secondaryThrottle,
                                       bool waitForDelete,
                                       bool takeDistLock) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kMoveChunk, nss.ns());
    shardVersion.appendForCommands(builder);
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardId, fromShardId.toString());
    builder->append(kToShardId, toShardId.toString());
    range.append(builder);
    builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes));
    secondaryThrottle.append(builder);
    builder->append(kWaitForDelete, waitForDelete);
    builder->append(kTakeDistLock, takeDistLock);
}
void MoveChunkRequest::appendAsCommand(BSONObjBuilder* builder,
                                       const NamespaceString& nss,
                                       ChunkVersion chunkVersion,
                                       const ConnectionString& configServerConnectionString,
                                       const ShardId& fromShardId,
                                       const ShardId& toShardId,
                                       const ChunkRange& range,
                                       int64_t maxChunkSizeBytes,
                                       const MigrationSecondaryThrottleOptions& secondaryThrottle,
                                       bool waitForDelete) {
    invariant(builder->asTempObj().isEmpty());
    invariant(nss.isValid());

    builder->append(kMoveChunk, nss.ns());
    chunkVersion.appendToCommand(builder);  // 3.4 shard compatibility
    builder->append(kEpoch, chunkVersion.epoch());
    // config connection string is included for 3.4 shard compatibility
    builder->append(kConfigServerConnectionString, configServerConnectionString.toString());
    builder->append(kFromShardId, fromShardId.toString());
    builder->append(kToShardId, toShardId.toString());
    range.append(builder);
    builder->append(kMaxChunkSizeBytes, static_cast<long long>(maxChunkSizeBytes));
    secondaryThrottle.append(builder);
    builder->append(kWaitForDelete, waitForDelete);
    builder->append(kTakeDistLock, false);
}
Example #5
0
void MetadataManager::_removeRangeToClean_inlock(const ChunkRange& range) {
    auto it = _rangesToClean.upper_bound(range.getMin());
    // We want our iterator to point at the greatest value
    // that is still less than or equal to range.
    if (it != _rangesToClean.begin()) {
        --it;
    }

    for (; it != _rangesToClean.end() && it->first < range.getMax();) {
        if (it->second <= range.getMin()) {
            ++it;
            continue;
        }

        // There's overlap between *it and range so we remove *it
        // and then replace with new ranges.
        BSONObj oldMin = it->first, oldMax = it->second;
        _rangesToClean.erase(it++);
        if (oldMin < range.getMin()) {
            _addRangeToClean_inlock(ChunkRange(oldMin, range.getMin()));
        }

        if (oldMax > range.getMax()) {
            _addRangeToClean_inlock(ChunkRange(range.getMax(), oldMax));
        }
    }
}
Example #6
0
Status CollectionShardingState::waitForClean(OperationContext* opCtx,
                                             const NamespaceString& nss,
                                             OID const& epoch,
                                             ChunkRange orphanRange) {
    while (true) {
        boost::optional<CleanupNotification> stillScheduled;

        {
            AutoGetCollection autoColl(opCtx, nss, MODE_IX);
            auto css = CollectionShardingState::get(opCtx, nss);

            {
                // First, see if collection was dropped, but do it in a separate scope in order to
                // not hold reference on it, which would make it appear in use
                auto metadata = css->_metadataManager->getActiveMetadata(css->_metadataManager);
                if (!metadata || metadata->getCollVersion().epoch() != epoch) {
                    return {ErrorCodes::StaleShardVersion, "Collection being migrated was dropped"};
                }
            }

            stillScheduled = css->trackOrphanedDataCleanup(orphanRange);
            if (!stillScheduled) {
                log() << "Finished deleting " << nss.ns() << " range "
                      << redact(orphanRange.toString());
                return Status::OK();
            }
        }

        log() << "Waiting for deletion of " << nss.ns() << " range " << orphanRange;

        Status result = stillScheduled->waitStatus(opCtx);
        if (!result.isOK()) {
            return result.withContext(str::stream() << "Failed to delete orphaned " << nss.ns()
                                                    << " range "
                                                    << orphanRange.toString());
        }
    }

    MONGO_UNREACHABLE;
}
Example #7
0
void MetadataManager::append(BSONObjBuilder* builder) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);

    BSONArrayBuilder rtcArr(builder->subarrayStart("rangesToClean"));
    for (const auto& entry : _rangesToClean) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        rtcArr.append(obj.done());
    }
    rtcArr.done();

    BSONArrayBuilder pcArr(builder->subarrayStart("pendingChunks"));
    for (const auto& entry : _receivingChunks) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        pcArr.append(obj.done());
    }
    pcArr.done();

    BSONArrayBuilder amrArr(builder->subarrayStart("activeMetadataRanges"));
    for (const auto& entry : _activeMetadataTracker->metadata->getChunks()) {
        BSONObjBuilder obj;
        ChunkRange r = ChunkRange(entry.first, entry.second);
        r.append(&obj);
        amrArr.append(obj.done());
    }
    amrArr.done();
}
Example #8
0
void MetadataManager::beginReceive(const ChunkRange& range) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);

    // Collection is not known to be sharded if the active metadata tracker is null
    invariant(_activeMetadataTracker);

    // If range is contained within pending chunks, this means a previous migration must have failed
    // and we need to clean all overlaps
    RangeVector overlappedChunks;
    getRangeMapOverlap(_receivingChunks, range.getMin(), range.getMax(), &overlappedChunks);

    for (const auto& overlapChunkMin : overlappedChunks) {
        auto itRecv = _receivingChunks.find(overlapChunkMin.first);
        invariant(itRecv != _receivingChunks.end());

        const ChunkRange receivingRange(itRecv->first, itRecv->second);

        _receivingChunks.erase(itRecv);

        // Make sure any potentially partially copied chunks are scheduled to be cleaned up
        _addRangeToClean_inlock(receivingRange);
    }

    // Need to ensure that the background range deleter task won't delete the range we are about to
    // receive
    _removeRangeToClean_inlock(range);
    _receivingChunks.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));

    // For compatibility with the current range deleter, update the pending chunks on the collection
    // metadata to include the chunk being received
    ChunkType chunk;
    chunk.setMin(range.getMin());
    chunk.setMax(range.getMax());
    _setActiveMetadata_inlock(_activeMetadataTracker->metadata->clonePlusPending(chunk));
}
Example #9
0
void MetadataManager::forgetReceive(const ChunkRange& range) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);

    {
        auto it = _receivingChunks.find(range.getMin());
        invariant(it != _receivingChunks.end());

        // Verify entire ChunkRange is identical, not just the min key.
        invariant(it->second == range.getMax());

        _receivingChunks.erase(it);
    }

    // This is potentially a partially received data, which needs to be cleaned up
    _addRangeToClean_inlock(range);

    // For compatibility with the current range deleter, update the pending chunks on the collection
    // metadata to exclude the chunk being received, which was added in beginReceive
    ChunkType chunk;
    chunk.setMin(range.getMin());
    chunk.setMax(range.getMax());
    _setActiveMetadata_inlock(_activeMetadataTracker->metadata->cloneMinusPending(chunk));
}
/* static */
Status CollectionShardingState::waitForClean(OperationContext* opCtx,
                                             NamespaceString nss,
                                             OID const& epoch,
                                             ChunkRange orphanRange) {
    do {
        auto stillScheduled = boost::optional<CleanupNotification>();
        {
            AutoGetCollection autoColl(opCtx, nss, MODE_IX);
            // First, see if collection was dropped.
            auto css = CollectionShardingState::get(opCtx, nss);
            {
                auto metadata = css->_metadataManager->getActiveMetadata(css->_metadataManager);
                if (!metadata || metadata->getCollVersion().epoch() != epoch) {
                    return {ErrorCodes::StaleShardVersion, "Collection being migrated was dropped"};
                }
            }  // drop metadata
            stillScheduled = css->trackOrphanedDataCleanup(orphanRange);
            if (!stillScheduled) {
                log() << "Finished deleting " << nss.ns() << " range "
                      << redact(orphanRange.toString());
                return Status::OK();
            }
        }  // drop collection lock

        log() << "Waiting for deletion of " << nss.ns() << " range " << orphanRange;
        Status result = stillScheduled->waitStatus(opCtx);
        if (!result.isOK()) {
            return Status{result.code(),
                          str::stream() << "Failed to delete orphaned " << nss.ns() << " range "
                                        << orphanRange.toString()
                                        << ": "
                                        << result.reason()};
        }
    } while (true);
    MONGO_UNREACHABLE;
}
Example #11
0
void MetadataManager::_addRangeToClean_inlock(const ChunkRange& range) {
    invariant(!rangeMapOverlaps(_rangesToClean, range.getMin(), range.getMax()));
    invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax()));
    _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));

    // If _rangesToClean was previously empty, we need to start the collection range deleter
    if (_rangesToClean.size() == 1UL) {
        ShardingState::get(_serviceContext)->scheduleCleanup(_nss);
    }
}
Example #12
0
void MetadataManager::forgetReceive(ChunkRange const& range) {
    stdx::lock_guard<stdx::mutex> lg(_managerLock);
    invariant(!_metadata.empty());

    // This is potentially a partially received chunk, which needs to be cleaned up. We know none
    // of these documents are in use, so they can go straight to the deletion queue.
    log() << "Abandoning in-migration of " << _nss.ns() << " range " << range
          << "; scheduling deletion of any documents already copied";

    invariant(!_overlapsInUseChunk(lg, range));

    auto it = _receivingChunks.find(range.getMin());
    invariant(it != _receivingChunks.end());
    _receivingChunks.erase(it);

    _pushRangeToClean(lg, range, Date_t{}).abandon();
}
Example #13
0
StatusWith<std::vector<BSONObj>> selectChunkSplitPoints(OperationContext* txn,
                                                        const ShardId& shardId,
                                                        const NamespaceString& nss,
                                                        const ShardKeyPattern& shardKeyPattern,
                                                        const ChunkRange& chunkRange,
                                                        long long chunkSizeBytes,
                                                        boost::optional<int> maxObjs) {
    BSONObjBuilder cmd;
    cmd.append("splitVector", nss.ns());
    cmd.append("keyPattern", shardKeyPattern.toBSON());
    chunkRange.append(&cmd);
    cmd.append("maxChunkSizeBytes", chunkSizeBytes);
    if (maxObjs) {
        cmd.append("maxChunkObjects", *maxObjs);
    }

    auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId);
    if (!shardStatus.isOK()) {
        return shardStatus.getStatus();
    }

    auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts(
        txn,
        ReadPreferenceSetting{ReadPreference::PrimaryPreferred},
        "admin",
        cmd.obj(),
        Shard::RetryPolicy::kIdempotent);
    if (!cmdStatus.isOK()) {
        return std::move(cmdStatus.getStatus());
    }
    if (!cmdStatus.getValue().commandStatus.isOK()) {
        return std::move(cmdStatus.getValue().commandStatus);
    }

    const auto response = std::move(cmdStatus.getValue().response);

    std::vector<BSONObj> splitPoints;

    BSONObjIterator it(response.getObjectField("splitKeys"));
    while (it.more()) {
        splitPoints.push_back(it.next().Obj().getOwned());
    }

    return std::move(splitPoints);
}
Example #14
0
std::shared_ptr<Notification<Status>> MetadataManager::_addRangeToClean_inlock(
    const ChunkRange& range) {
    // This first invariant currently makes an unnecessary copy, to reuse the
    // rangeMapOverlaps helper function.
    invariant(!rangeMapOverlaps(_getCopyOfRangesToClean_inlock(), range.getMin(), range.getMax()));
    invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax()));

    RangeToCleanDescriptor descriptor(range.getMax().getOwned());
    _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), descriptor));

    // If _rangesToClean was previously empty, we need to start the collection range deleter
    if (_rangesToClean.size() == 1UL) {
        ShardingState::get(_serviceContext)->scheduleCleanup(_nss);
    }

    return descriptor.getNotification();
}
Example #15
0
void MetadataManager::_addRangeToClean_inlock(const ChunkRange& range) {
    invariant(!rangeMapOverlaps(_rangesToClean, range.getMin(), range.getMax()));
    invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax()));
    _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));
}
Example #16
0
void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* opCtx,
                                           ChunkManager* manager,
                                           Chunk* chunk,
                                           long dataWritten) {
    // Disable lastError tracking so that any errors, which occur during auto-split do not get
    // bubbled up on the client connection doing a write.
    LastError::Disabled d(&LastError::get(cc()));

    const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration();

    const bool minIsInf =
        (0 == manager->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk->getMin()));
    const bool maxIsInf =
        (0 == manager->getShardKeyPattern().getKeyPattern().globalMax().woCompare(chunk->getMax()));

    const uint64_t chunkBytesWritten = chunk->addBytesWritten(dataWritten);

    const uint64_t desiredChunkSize =
        calculateDesiredChunkSize(balancerConfig->getMaxChunkSizeBytes(), manager->numChunks());

    if (!chunk->shouldSplit(desiredChunkSize, minIsInf, maxIsInf)) {
        return;
    }

    const NamespaceString nss(manager->getns());

    if (!manager->_autoSplitThrottle._splitTickets.tryAcquire()) {
        LOG(1) << "won't auto split because not enough tickets: " << nss;
        return;
    }

    TicketHolderReleaser releaser(&(manager->_autoSplitThrottle._splitTickets));

    const ChunkRange chunkRange(chunk->getMin(), chunk->getMax());

    try {
        // Ensure we have the most up-to-date balancer configuration
        uassertStatusOK(balancerConfig->refreshAndCheck(opCtx));

        if (!balancerConfig->getShouldAutoSplit()) {
            return;
        }

        LOG(1) << "about to initiate autosplit: " << redact(chunk->toString())
               << " dataWritten: " << chunkBytesWritten
               << " desiredChunkSize: " << desiredChunkSize;

        const uint64_t chunkSizeToUse = [&]() {
            const uint64_t estNumSplitPoints = chunkBytesWritten / desiredChunkSize * 2;

            if (estNumSplitPoints >= kTooManySplitPoints) {
                // The current desired chunk size will split the chunk into lots of small chunk and
                // at the worst case this can result into thousands of chunks. So check and see if a
                // bigger value can be used.
                return std::min(chunkBytesWritten, balancerConfig->getMaxChunkSizeBytes());
            } else {
                return desiredChunkSize;
            }
        }();

        auto splitPoints =
            uassertStatusOK(shardutil::selectChunkSplitPoints(opCtx,
                                                              chunk->getShardId(),
                                                              nss,
                                                              manager->getShardKeyPattern(),
                                                              chunkRange,
                                                              chunkSizeToUse,
                                                              boost::none));

        if (splitPoints.size() <= 1) {
            // No split points means there isn't enough data to split on; 1 split point means we
            // have
            // between half the chunk size to full chunk size so there is no need to split yet
            chunk->clearBytesWritten();
            return;
        }

        if (minIsInf || maxIsInf) {
            // We don't want to reset _dataWritten since we want to check the other side right away
        } else {
            // We're splitting, so should wait a bit
            chunk->clearBytesWritten();
        }

        // We assume that if the chunk being split is the first (or last) one on the collection,
        // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the
        // very first (or last) key as a split point.
        //
        // This heuristic is skipped for "special" shard key patterns that are not likely to produce
        // monotonically increasing or decreasing values (e.g. hashed shard keys).
        if (KeyPattern::isOrderedKeyPattern(manager->getShardKeyPattern().toBSON())) {
            if (minIsInf) {
                BSONObj key = findExtremeKeyForShard(
                    opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), true);
                if (!key.isEmpty()) {
                    splitPoints.front() = key.getOwned();
                }
            } else if (maxIsInf) {
                BSONObj key = findExtremeKeyForShard(
                    opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), false);
                if (!key.isEmpty()) {
                    splitPoints.back() = key.getOwned();
                }
            }
        }

        const auto suggestedMigrateChunk =
            uassertStatusOK(shardutil::splitChunkAtMultiplePoints(opCtx,
                                                                  chunk->getShardId(),
                                                                  nss,
                                                                  manager->getShardKeyPattern(),
                                                                  manager->getVersion(),
                                                                  chunkRange,
                                                                  splitPoints));

        // Balance the resulting chunks if the option is enabled and if the shard suggested a chunk
        // to balance
        const bool shouldBalance = [&]() {
            if (!balancerConfig->shouldBalanceForAutoSplit())
                return false;

            auto collStatus =
                Grid::get(opCtx)->catalogClient()->getCollection(opCtx, manager->getns());
            if (!collStatus.isOK()) {
                log() << "Auto-split for " << nss << " failed to load collection metadata"
                      << causedBy(redact(collStatus.getStatus()));
                return false;
            }

            return collStatus.getValue().value.getAllowBalance();
        }();

        log() << "autosplitted " << nss << " chunk: " << redact(chunk->toString()) << " into "
              << (splitPoints.size() + 1) << " parts (desiredChunkSize " << desiredChunkSize << ")"
              << (suggestedMigrateChunk ? "" : (std::string) " (migrate suggested" +
                          (shouldBalance ? ")" : ", but no migrations allowed)"));

        // Reload the chunk manager after the split
        auto routingInfo = uassertStatusOK(
            Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
                                                                                         nss));

        if (!shouldBalance || !suggestedMigrateChunk) {
            return;
        }

        // Top chunk optimization - try to move the top chunk out of this shard to prevent the hot
        // spot from staying on a single shard. This is based on the assumption that succeeding
        // inserts will fall on the top chunk.

        // We need to use the latest chunk manager (after the split) in order to have the most
        // up-to-date view of the chunk we are about to move
        auto suggestedChunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation(
            suggestedMigrateChunk->getMin());

        ChunkType chunkToMove;
        chunkToMove.setNS(nss.ns());
        chunkToMove.setShard(suggestedChunk->getShardId());
        chunkToMove.setMin(suggestedChunk->getMin());
        chunkToMove.setMax(suggestedChunk->getMax());
        chunkToMove.setVersion(suggestedChunk->getLastmod());

        uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, chunkToMove));

        // Ensure the collection gets reloaded because of the move
        Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss);
    } catch (const DBException& ex) {
        chunk->clearBytesWritten();

        if (ErrorCodes::isStaleShardingError(ErrorCodes::Error(ex.getCode()))) {
            log() << "Unable to auto-split chunk " << redact(chunkRange.toString()) << causedBy(ex)
                  << ", going to invalidate routing table entry for " << nss;
            Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss);
        }
    }
}
Example #17
0
ChunkType::ChunkType(NamespaceString nss, ChunkRange range, ChunkVersion version, ShardId shardId)
    : _nss(nss),
      _min(range.getMin()),
      _max(range.getMax()),
      _version(version),
      _shard(std::move(shardId)) {}
Example #18
0
bool MetadataManager::isInRangesToClean(const ChunkRange& range) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
    // For convenience, this line makes an unnecessary copy, to reuse the
    // rangeMapContains helper function.
    return rangeMapContains(_getCopyOfRangesToClean_inlock(), range.getMin(), range.getMax());
}
Example #19
0
void MetadataManager::setFilteringMetadata(CollectionMetadata remoteMetadata) {
    stdx::lock_guard<stdx::mutex> lg(_managerLock);

    // Collection is becoming sharded
    if (_metadata.empty()) {
        LOG(0) << "Marking collection " << _nss.ns() << " as " << remoteMetadata.toStringBasic();

        invariant(_receivingChunks.empty());
        invariant(_rangesToClean.isEmpty());

        _setActiveMetadata(lg, std::move(remoteMetadata));
        return;
    }

    const auto& activeMetadata = _metadata.back()->metadata;

    // If the metadata being installed has a different epoch from ours, this means the collection
    // was dropped and recreated, so we must entirely reset the metadata state
    if (activeMetadata.getCollVersion().epoch() != remoteMetadata.getCollVersion().epoch()) {
        LOG(0) << "Updating metadata for collection " << _nss.ns() << " from "
               << activeMetadata.toStringBasic() << " to " << remoteMetadata.toStringBasic()
               << " due to epoch change";

        _receivingChunks.clear();
        _clearAllCleanups(lg);
        _metadata.clear();

        _setActiveMetadata(lg, std::move(remoteMetadata));
        return;
    }

    // We already have the same or newer version
    if (activeMetadata.getCollVersion() >= remoteMetadata.getCollVersion()) {
        LOG(1) << "Ignoring update of active metadata " << activeMetadata.toStringBasic()
               << " with an older " << remoteMetadata.toStringBasic();
        return;
    }

    LOG(0) << "Updating metadata for collection " << _nss.ns() << " from "
           << activeMetadata.toStringBasic() << " to " << remoteMetadata.toStringBasic()
           << " due to version change";

    // Resolve any receiving chunks, which might have completed by now
    for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) {
        const ChunkRange receivingRange(it->first, it->second);

        if (!remoteMetadata.rangeOverlapsChunk(receivingRange)) {
            ++it;
            continue;
        }

        // The remote metadata contains a chunk we were earlier in the process of receiving, so we
        // deem it successfully received
        LOG(2) << "Verified chunk " << redact(receivingRange.toString()) << " for collection "
               << _nss.ns() << " has been migrated to this shard earlier";

        _receivingChunks.erase(it);
        it = _receivingChunks.begin();
    }

    _setActiveMetadata(lg, std::move(remoteMetadata));
}
Example #20
0
TagsType::TagsType(NamespaceString nss, std::string tag, ChunkRange range)
    : _ns(std::move(nss)),
      _tag(std::move(tag)),
      _minKey(range.getMin().getOwned()),
      _maxKey(range.getMax().getOwned()) {}
StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx,
                                                    Collection* collection,
                                                    BSONObj const& keyPattern,
                                                    ChunkRange const& range,
                                                    int maxToDelete) {
    invariant(collection != nullptr);
    invariant(!isEmpty());

    auto const& nss = collection->ns();

    // The IndexChunk has a keyPattern that may apply to more than one index - we need to
    // select the index and get the full index keyPattern here.
    auto catalog = collection->getIndexCatalog();
    const IndexDescriptor* idx = catalog->findShardKeyPrefixedIndex(opCtx, keyPattern, false);
    if (!idx) {
        std::string msg = str::stream() << "Unable to find shard key index for "
                                        << keyPattern.toString() << " in " << nss.ns();
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    // Extend bounds to match the index we found
    const KeyPattern indexKeyPattern(idx->keyPattern());
    const auto extend = [&](const auto& key) {
        return Helpers::toKeyFormat(indexKeyPattern.extendRangeBound(key, false));
    };

    const auto min = extend(range.getMin());
    const auto max = extend(range.getMax());

    LOG(1) << "begin removal of " << min << " to " << max << " in " << nss.ns();

    const auto indexName = idx->indexName();
    const IndexDescriptor* descriptor =
        collection->getIndexCatalog()->findIndexByName(opCtx, indexName);
    if (!descriptor) {
        std::string msg = str::stream() << "shard key index with name " << indexName << " on '"
                                        << nss.ns() << "' was dropped";
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    auto deleteStageParams = std::make_unique<DeleteStageParams>();
    deleteStageParams->fromMigrate = true;
    deleteStageParams->isMulti = true;
    deleteStageParams->returnDeleted = true;

    if (serverGlobalParams.moveParanoia) {
        deleteStageParams->removeSaver =
            std::make_unique<RemoveSaver>("moveChunk", nss.ns(), "cleaning");
    }

    auto exec = InternalPlanner::deleteWithIndexScan(opCtx,
                                                     collection,
                                                     std::move(deleteStageParams),
                                                     descriptor,
                                                     min,
                                                     max,
                                                     BoundInclusion::kIncludeStartKeyOnly,
                                                     PlanExecutor::YIELD_MANUAL,
                                                     InternalPlanner::FORWARD);

    PlanYieldPolicy planYieldPolicy(exec.get(), PlanExecutor::YIELD_MANUAL);

    int numDeleted = 0;
    do {
        BSONObj deletedObj;
        PlanExecutor::ExecState state = exec->getNext(&deletedObj, nullptr);

        if (state == PlanExecutor::IS_EOF) {
            break;
        }

        if (state == PlanExecutor::FAILURE) {
            warning() << PlanExecutor::statestr(state) << " - cursor error while trying to delete "
                      << redact(min) << " to " << redact(max) << " in " << nss
                      << ": FAILURE, stats: " << Explain::getWinningPlanStats(exec.get());
            break;
        }

        invariant(PlanExecutor::ADVANCED == state);
        ShardingStatistics::get(opCtx).countDocsDeletedOnDonor.addAndFetch(1);

    } while (++numDeleted < maxToDelete);

    return numDeleted;
}
Example #22
0
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints(
    OperationContext* txn,
    const ShardId& shardId,
    const NamespaceString& nss,
    const ShardKeyPattern& shardKeyPattern,
    ChunkVersion collectionVersion,
    const ChunkRange& chunkRange,
    const std::vector<BSONObj>& splitPoints) {
    invariant(!splitPoints.empty());

    const size_t kMaxSplitPoints = 8192;

    if (splitPoints.size() > kMaxSplitPoints) {
        return {ErrorCodes::BadValue,
                str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints
                              << " parts at a time."};
    }

    // Sanity check that we are not attempting to split at the boundaries of the chunk. This check
    // is already performed at chunk split commit time, but we are performing it here for parity
    // with old auto-split code, which might rely on it.
    if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMin() == splitPoints.front())) {
        const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString()
                                            << ", split point "
                                            << splitPoints.front()
                                            << " is exactly on chunk bounds");
        return {ErrorCodes::CannotSplit, msg};
    }

    if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMax() == splitPoints.back())) {
        const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString()
                                            << ", split point "
                                            << splitPoints.back()
                                            << " is exactly on chunk bounds");
        return {ErrorCodes::CannotSplit, msg};
    }

    BSONObjBuilder cmd;
    cmd.append("splitChunk", nss.ns());
    cmd.append("configdb",
               Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString());
    cmd.append("from", shardId.toString());
    cmd.append("keyPattern", shardKeyPattern.toBSON());
    collectionVersion.appendForCommands(&cmd);
    chunkRange.append(&cmd);
    cmd.append("splitKeys", splitPoints);

    BSONObj cmdObj = cmd.obj();

    Status status{ErrorCodes::InternalError, "Uninitialized value"};
    BSONObj cmdResponse;

    auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId);
    if (!shardStatus.isOK()) {
        status = shardStatus.getStatus();
    } else {
        auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts(
            txn,
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            "admin",
            cmdObj,
            Shard::RetryPolicy::kNotIdempotent);
        if (!cmdStatus.isOK()) {
            status = std::move(cmdStatus.getStatus());
        } else {
            status = std::move(cmdStatus.getValue().commandStatus);
            cmdResponse = std::move(cmdStatus.getValue().response);
        }
    }

    if (!status.isOK()) {
        log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status));
        return {status.code(), str::stream() << "split failed due to " << status.toString()};
    }

    BSONElement shouldMigrateElement;
    status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement);
    if (status.isOK()) {
        auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject());
        if (!chunkRangeStatus.isOK()) {
            return chunkRangeStatus.getStatus();
        }

        return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue()));
    } else if (status != ErrorCodes::NoSuchKey) {
        warning()
            << "Chunk migration will be skipped because splitChunk returned invalid response: "
            << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed"
            << causedBy(redact(status));
    }

    return boost::optional<ChunkRange>();
}
StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx,
                                                    Collection* collection,
                                                    BSONObj const& keyPattern,
                                                    ChunkRange const& range,
                                                    int maxToDelete) {
    invariant(collection != nullptr);
    invariant(!isEmpty());

    auto const& nss = collection->ns();

    // The IndexChunk has a keyPattern that may apply to more than one index - we need to
    // select the index and get the full index keyPattern here.
    auto catalog = collection->getIndexCatalog();
    const IndexDescriptor* idx = catalog->findShardKeyPrefixedIndex(opCtx, keyPattern, false);
    if (!idx) {
        std::string msg = str::stream() << "Unable to find shard key index for "
                                        << keyPattern.toString() << " in " << nss.ns();
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    // Extend bounds to match the index we found
    const KeyPattern indexKeyPattern(idx->keyPattern());
    const auto extend = [&](const auto& key) {
        return Helpers::toKeyFormat(indexKeyPattern.extendRangeBound(key, false));
    };

    const auto min = extend(range.getMin());
    const auto max = extend(range.getMax());

    LOG(1) << "begin removal of " << min << " to " << max << " in " << nss.ns();

    const auto indexName = idx->indexName();
    IndexDescriptor* descriptor = collection->getIndexCatalog()->findIndexByName(opCtx, indexName);
    if (!descriptor) {
        std::string msg = str::stream() << "shard key index with name " << indexName << " on '"
                                        << nss.ns() << "' was dropped";
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    boost::optional<Helpers::RemoveSaver> saver;
    if (serverGlobalParams.moveParanoia) {
        saver.emplace("moveChunk", nss.ns(), "cleaning");
    }

    auto halfOpen = BoundInclusion::kIncludeStartKeyOnly;
    auto manual = PlanExecutor::YIELD_MANUAL;
    auto forward = InternalPlanner::FORWARD;
    auto fetch = InternalPlanner::IXSCAN_FETCH;

    auto exec = InternalPlanner::indexScan(
        opCtx, collection, descriptor, min, max, halfOpen, manual, forward, fetch);

    int numDeleted = 0;
    do {
        RecordId rloc;
        BSONObj obj;
        PlanExecutor::ExecState state = exec->getNext(&obj, &rloc);
        if (state == PlanExecutor::IS_EOF) {
            break;
        }
        if (state == PlanExecutor::FAILURE || state == PlanExecutor::DEAD) {
            warning() << PlanExecutor::statestr(state) << " - cursor error while trying to delete "
                      << redact(min) << " to " << redact(max) << " in " << nss << ": "
                      << redact(WorkingSetCommon::toStatusString(obj))
                      << ", stats: " << Explain::getWinningPlanStats(exec.get());
            break;
        }
        invariant(PlanExecutor::ADVANCED == state);

        exec->saveState();

        writeConflictRetry(opCtx, "delete range", nss.ns(), [&] {
            WriteUnitOfWork wuow(opCtx);
            if (saver) {
                uassertStatusOK(saver->goingToDelete(obj));
            }
            collection->deleteDocument(opCtx, kUninitializedStmtId, rloc, nullptr, true);
            wuow.commit();
        });

        try {
            exec->restoreState();
        } catch (const DBException& ex) {
            warning() << "error restoring cursor state while trying to delete " << redact(min)
                      << " to " << redact(max) << " in " << nss
                      << ", stats: " << Explain::getWinningPlanStats(exec.get()) << ": "
                      << redact(ex.toStatus());
            break;
        }
        ShardingStatistics::get(opCtx).countDocsDeletedOnDonor.addAndFetch(1);

    } while (++numDeleted < maxToDelete);

    return numDeleted;
}