Beispiel #1
0
void MetadataManager::beginReceive(const ChunkRange& range) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);

    // Collection is not known to be sharded if the active metadata tracker is null
    invariant(_activeMetadataTracker);

    // If range is contained within pending chunks, this means a previous migration must have failed
    // and we need to clean all overlaps
    RangeVector overlappedChunks;
    getRangeMapOverlap(_receivingChunks, range.getMin(), range.getMax(), &overlappedChunks);

    for (const auto& overlapChunkMin : overlappedChunks) {
        auto itRecv = _receivingChunks.find(overlapChunkMin.first);
        invariant(itRecv != _receivingChunks.end());

        const ChunkRange receivingRange(itRecv->first, itRecv->second);

        _receivingChunks.erase(itRecv);

        // Make sure any potentially partially copied chunks are scheduled to be cleaned up
        _addRangeToClean_inlock(receivingRange);
    }

    // Need to ensure that the background range deleter task won't delete the range we are about to
    // receive
    _removeRangeToClean_inlock(range);
    _receivingChunks.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));

    // For compatibility with the current range deleter, update the pending chunks on the collection
    // metadata to include the chunk being received
    ChunkType chunk;
    chunk.setMin(range.getMin());
    chunk.setMax(range.getMax());
    _setActiveMetadata_inlock(_activeMetadataTracker->metadata->clonePlusPending(chunk));
}
Beispiel #2
0
void MetadataManager::_removeRangeToClean_inlock(const ChunkRange& range) {
    auto it = _rangesToClean.upper_bound(range.getMin());
    // We want our iterator to point at the greatest value
    // that is still less than or equal to range.
    if (it != _rangesToClean.begin()) {
        --it;
    }

    for (; it != _rangesToClean.end() && it->first < range.getMax();) {
        if (it->second <= range.getMin()) {
            ++it;
            continue;
        }

        // There's overlap between *it and range so we remove *it
        // and then replace with new ranges.
        BSONObj oldMin = it->first, oldMax = it->second;
        _rangesToClean.erase(it++);
        if (oldMin < range.getMin()) {
            _addRangeToClean_inlock(ChunkRange(oldMin, range.getMin()));
        }

        if (oldMax > range.getMax()) {
            _addRangeToClean_inlock(ChunkRange(range.getMax(), oldMax));
        }
    }
}
Beispiel #3
0
ChunkRange::ChunkRange(const ChunkRange& min, const ChunkRange& max)
    : _manager(min.getManager()),
      _shardId(min.getShardId()),
      _min(min.getMin()),
      _max(max.getMax()) {
    invariant(min.getShardId() == max.getShardId());
    invariant(min.getManager() == max.getManager());
    invariant(min.getMax() == max.getMin());
}
Beispiel #4
0
void MetadataManager::_addRangeToClean_inlock(const ChunkRange& range) {
    invariant(!rangeMapOverlaps(_rangesToClean, range.getMin(), range.getMax()));
    invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax()));
    _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));

    // If _rangesToClean was previously empty, we need to start the collection range deleter
    if (_rangesToClean.size() == 1UL) {
        ShardingState::get(_serviceContext)->scheduleCleanup(_nss);
    }
}
Beispiel #5
0
std::shared_ptr<Notification<Status>> MetadataManager::_addRangeToClean_inlock(
    const ChunkRange& range) {
    // This first invariant currently makes an unnecessary copy, to reuse the
    // rangeMapOverlaps helper function.
    invariant(!rangeMapOverlaps(_getCopyOfRangesToClean_inlock(), range.getMin(), range.getMax()));
    invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax()));

    RangeToCleanDescriptor descriptor(range.getMax().getOwned());
    _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), descriptor));

    // If _rangesToClean was previously empty, we need to start the collection range deleter
    if (_rangesToClean.size() == 1UL) {
        ShardingState::get(_serviceContext)->scheduleCleanup(_nss);
    }

    return descriptor.getNotification();
}
Beispiel #6
0
void MetadataManager::forgetReceive(const ChunkRange& range) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);

    {
        auto it = _receivingChunks.find(range.getMin());
        invariant(it != _receivingChunks.end());

        // Verify entire ChunkRange is identical, not just the min key.
        invariant(it->second == range.getMax());

        _receivingChunks.erase(it);
    }

    // This is potentially a partially received data, which needs to be cleaned up
    _addRangeToClean_inlock(range);

    // For compatibility with the current range deleter, update the pending chunks on the collection
    // metadata to exclude the chunk being received, which was added in beginReceive
    ChunkType chunk;
    chunk.setMin(range.getMin());
    chunk.setMax(range.getMax());
    _setActiveMetadata_inlock(_activeMetadataTracker->metadata->cloneMinusPending(chunk));
}
Beispiel #7
0
void MetadataManager::_addRangeToClean_inlock(const ChunkRange& range) {
    invariant(!rangeMapOverlaps(_rangesToClean, range.getMin(), range.getMax()));
    invariant(!rangeMapOverlaps(_receivingChunks, range.getMin(), range.getMax()));
    _rangesToClean.insert(std::make_pair(range.getMin().getOwned(), range.getMax().getOwned()));
}
Beispiel #8
0
ChunkType::ChunkType(NamespaceString nss, ChunkRange range, ChunkVersion version, ShardId shardId)
    : _nss(nss),
      _min(range.getMin()),
      _max(range.getMax()),
      _version(version),
      _shard(std::move(shardId)) {}
Beispiel #9
0
StatusWith<boost::optional<ChunkRange>> splitChunkAtMultiplePoints(
    OperationContext* txn,
    const ShardId& shardId,
    const NamespaceString& nss,
    const ShardKeyPattern& shardKeyPattern,
    ChunkVersion collectionVersion,
    const ChunkRange& chunkRange,
    const std::vector<BSONObj>& splitPoints) {
    invariant(!splitPoints.empty());

    const size_t kMaxSplitPoints = 8192;

    if (splitPoints.size() > kMaxSplitPoints) {
        return {ErrorCodes::BadValue,
                str::stream() << "Cannot split chunk in more than " << kMaxSplitPoints
                              << " parts at a time."};
    }

    // Sanity check that we are not attempting to split at the boundaries of the chunk. This check
    // is already performed at chunk split commit time, but we are performing it here for parity
    // with old auto-split code, which might rely on it.
    if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMin() == splitPoints.front())) {
        const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString()
                                            << ", split point "
                                            << splitPoints.front()
                                            << " is exactly on chunk bounds");
        return {ErrorCodes::CannotSplit, msg};
    }

    if (SimpleBSONObjComparator::kInstance.evaluate(chunkRange.getMax() == splitPoints.back())) {
        const std::string msg(str::stream() << "not splitting chunk " << chunkRange.toString()
                                            << ", split point "
                                            << splitPoints.back()
                                            << " is exactly on chunk bounds");
        return {ErrorCodes::CannotSplit, msg};
    }

    BSONObjBuilder cmd;
    cmd.append("splitChunk", nss.ns());
    cmd.append("configdb",
               Grid::get(txn)->shardRegistry()->getConfigServerConnectionString().toString());
    cmd.append("from", shardId.toString());
    cmd.append("keyPattern", shardKeyPattern.toBSON());
    collectionVersion.appendForCommands(&cmd);
    chunkRange.append(&cmd);
    cmd.append("splitKeys", splitPoints);

    BSONObj cmdObj = cmd.obj();

    Status status{ErrorCodes::InternalError, "Uninitialized value"};
    BSONObj cmdResponse;

    auto shardStatus = Grid::get(txn)->shardRegistry()->getShard(txn, shardId);
    if (!shardStatus.isOK()) {
        status = shardStatus.getStatus();
    } else {
        auto cmdStatus = shardStatus.getValue()->runCommandWithFixedRetryAttempts(
            txn,
            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
            "admin",
            cmdObj,
            Shard::RetryPolicy::kNotIdempotent);
        if (!cmdStatus.isOK()) {
            status = std::move(cmdStatus.getStatus());
        } else {
            status = std::move(cmdStatus.getValue().commandStatus);
            cmdResponse = std::move(cmdStatus.getValue().response);
        }
    }

    if (!status.isOK()) {
        log() << "Split chunk " << redact(cmdObj) << " failed" << causedBy(redact(status));
        return {status.code(), str::stream() << "split failed due to " << status.toString()};
    }

    BSONElement shouldMigrateElement;
    status = bsonExtractTypedField(cmdResponse, kShouldMigrate, Object, &shouldMigrateElement);
    if (status.isOK()) {
        auto chunkRangeStatus = ChunkRange::fromBSON(shouldMigrateElement.embeddedObject());
        if (!chunkRangeStatus.isOK()) {
            return chunkRangeStatus.getStatus();
        }

        return boost::optional<ChunkRange>(std::move(chunkRangeStatus.getValue()));
    } else if (status != ErrorCodes::NoSuchKey) {
        warning()
            << "Chunk migration will be skipped because splitChunk returned invalid response: "
            << redact(cmdResponse) << ". Extracting " << kShouldMigrate << " field failed"
            << causedBy(redact(status));
    }

    return boost::optional<ChunkRange>();
}
Beispiel #10
0
TagsType::TagsType(NamespaceString nss, std::string tag, ChunkRange range)
    : _ns(std::move(nss)),
      _tag(std::move(tag)),
      _minKey(range.getMin().getOwned()),
      _maxKey(range.getMax().getOwned()) {}
StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx,
                                                    Collection* collection,
                                                    BSONObj const& keyPattern,
                                                    ChunkRange const& range,
                                                    int maxToDelete) {
    invariant(collection != nullptr);
    invariant(!isEmpty());

    auto const& nss = collection->ns();

    // The IndexChunk has a keyPattern that may apply to more than one index - we need to
    // select the index and get the full index keyPattern here.
    auto catalog = collection->getIndexCatalog();
    const IndexDescriptor* idx = catalog->findShardKeyPrefixedIndex(opCtx, keyPattern, false);
    if (!idx) {
        std::string msg = str::stream() << "Unable to find shard key index for "
                                        << keyPattern.toString() << " in " << nss.ns();
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    // Extend bounds to match the index we found
    const KeyPattern indexKeyPattern(idx->keyPattern());
    const auto extend = [&](const auto& key) {
        return Helpers::toKeyFormat(indexKeyPattern.extendRangeBound(key, false));
    };

    const auto min = extend(range.getMin());
    const auto max = extend(range.getMax());

    LOG(1) << "begin removal of " << min << " to " << max << " in " << nss.ns();

    const auto indexName = idx->indexName();
    const IndexDescriptor* descriptor =
        collection->getIndexCatalog()->findIndexByName(opCtx, indexName);
    if (!descriptor) {
        std::string msg = str::stream() << "shard key index with name " << indexName << " on '"
                                        << nss.ns() << "' was dropped";
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    auto deleteStageParams = std::make_unique<DeleteStageParams>();
    deleteStageParams->fromMigrate = true;
    deleteStageParams->isMulti = true;
    deleteStageParams->returnDeleted = true;

    if (serverGlobalParams.moveParanoia) {
        deleteStageParams->removeSaver =
            std::make_unique<RemoveSaver>("moveChunk", nss.ns(), "cleaning");
    }

    auto exec = InternalPlanner::deleteWithIndexScan(opCtx,
                                                     collection,
                                                     std::move(deleteStageParams),
                                                     descriptor,
                                                     min,
                                                     max,
                                                     BoundInclusion::kIncludeStartKeyOnly,
                                                     PlanExecutor::YIELD_MANUAL,
                                                     InternalPlanner::FORWARD);

    PlanYieldPolicy planYieldPolicy(exec.get(), PlanExecutor::YIELD_MANUAL);

    int numDeleted = 0;
    do {
        BSONObj deletedObj;
        PlanExecutor::ExecState state = exec->getNext(&deletedObj, nullptr);

        if (state == PlanExecutor::IS_EOF) {
            break;
        }

        if (state == PlanExecutor::FAILURE) {
            warning() << PlanExecutor::statestr(state) << " - cursor error while trying to delete "
                      << redact(min) << " to " << redact(max) << " in " << nss
                      << ": FAILURE, stats: " << Explain::getWinningPlanStats(exec.get());
            break;
        }

        invariant(PlanExecutor::ADVANCED == state);
        ShardingStatistics::get(opCtx).countDocsDeletedOnDonor.addAndFetch(1);

    } while (++numDeleted < maxToDelete);

    return numDeleted;
}
Beispiel #12
0
bool MetadataManager::isInRangesToClean(const ChunkRange& range) {
    stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
    // For convenience, this line makes an unnecessary copy, to reuse the
    // rangeMapContains helper function.
    return rangeMapContains(_getCopyOfRangesToClean_inlock(), range.getMin(), range.getMax());
}
StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx,
                                                    Collection* collection,
                                                    BSONObj const& keyPattern,
                                                    ChunkRange const& range,
                                                    int maxToDelete) {
    invariant(collection != nullptr);
    invariant(!isEmpty());

    auto const& nss = collection->ns();

    // The IndexChunk has a keyPattern that may apply to more than one index - we need to
    // select the index and get the full index keyPattern here.
    auto catalog = collection->getIndexCatalog();
    const IndexDescriptor* idx = catalog->findShardKeyPrefixedIndex(opCtx, keyPattern, false);
    if (!idx) {
        std::string msg = str::stream() << "Unable to find shard key index for "
                                        << keyPattern.toString() << " in " << nss.ns();
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    // Extend bounds to match the index we found
    const KeyPattern indexKeyPattern(idx->keyPattern());
    const auto extend = [&](const auto& key) {
        return Helpers::toKeyFormat(indexKeyPattern.extendRangeBound(key, false));
    };

    const auto min = extend(range.getMin());
    const auto max = extend(range.getMax());

    LOG(1) << "begin removal of " << min << " to " << max << " in " << nss.ns();

    const auto indexName = idx->indexName();
    IndexDescriptor* descriptor = collection->getIndexCatalog()->findIndexByName(opCtx, indexName);
    if (!descriptor) {
        std::string msg = str::stream() << "shard key index with name " << indexName << " on '"
                                        << nss.ns() << "' was dropped";
        LOG(0) << msg;
        return {ErrorCodes::InternalError, msg};
    }

    boost::optional<Helpers::RemoveSaver> saver;
    if (serverGlobalParams.moveParanoia) {
        saver.emplace("moveChunk", nss.ns(), "cleaning");
    }

    auto halfOpen = BoundInclusion::kIncludeStartKeyOnly;
    auto manual = PlanExecutor::YIELD_MANUAL;
    auto forward = InternalPlanner::FORWARD;
    auto fetch = InternalPlanner::IXSCAN_FETCH;

    auto exec = InternalPlanner::indexScan(
        opCtx, collection, descriptor, min, max, halfOpen, manual, forward, fetch);

    int numDeleted = 0;
    do {
        RecordId rloc;
        BSONObj obj;
        PlanExecutor::ExecState state = exec->getNext(&obj, &rloc);
        if (state == PlanExecutor::IS_EOF) {
            break;
        }
        if (state == PlanExecutor::FAILURE || state == PlanExecutor::DEAD) {
            warning() << PlanExecutor::statestr(state) << " - cursor error while trying to delete "
                      << redact(min) << " to " << redact(max) << " in " << nss << ": "
                      << redact(WorkingSetCommon::toStatusString(obj))
                      << ", stats: " << Explain::getWinningPlanStats(exec.get());
            break;
        }
        invariant(PlanExecutor::ADVANCED == state);

        exec->saveState();

        writeConflictRetry(opCtx, "delete range", nss.ns(), [&] {
            WriteUnitOfWork wuow(opCtx);
            if (saver) {
                uassertStatusOK(saver->goingToDelete(obj));
            }
            collection->deleteDocument(opCtx, kUninitializedStmtId, rloc, nullptr, true);
            wuow.commit();
        });

        try {
            exec->restoreState();
        } catch (const DBException& ex) {
            warning() << "error restoring cursor state while trying to delete " << redact(min)
                      << " to " << redact(max) << " in " << nss
                      << ", stats: " << Explain::getWinningPlanStats(exec.get()) << ": "
                      << redact(ex.toStatus());
            break;
        }
        ShardingStatistics::get(opCtx).countDocsDeletedOnDonor.addAndFetch(1);

    } while (++numDeleted < maxToDelete);

    return numDeleted;
}