ShardEndpoint ChunkManagerTargeter::_targetShardKey(const BSONObj& shardKey,
                                                    const BSONObj& collation,
                                                    long long estDataSize) const {
    const auto chunk = _routingInfo->cm()->findIntersectingChunk(shardKey, collation);

    return {chunk.getShardId(), _routingInfo->cm()->getVersion(chunk.getShardId())};
}
Example #2
0
    void handleLsid(const LogicalSessionId& lsid) {
        // There are some lifetime issues with when the reaper starts up versus when the grid is
        // available.  Moving routing info fetching until after we have a transaction moves us past
        // the problem.
        //
        // Also, we should only need the chunk case, but that'll wait until the sessions table is
        // actually sharded.
        if (!(_cm || _primary)) {
            auto routingInfo =
                uassertStatusOK(Grid::get(_opCtx)->catalogCache()->getCollectionRoutingInfo(
                    _opCtx, SessionsCollection::kSessionsNamespaceString));
            _cm = routingInfo.cm();
            _primary = routingInfo.db().primary();
        }

        ShardId shardId;
        if (_cm) {
            const auto chunk = _cm->findIntersectingChunkWithSimpleCollation(lsid.toBSON());
            shardId = chunk.getShardId();
        } else {
            shardId = _primary->getId();
        }

        auto& lsids = _shards[shardId];
        lsids.insert(lsid);

        if (lsids.size() > write_ops::kMaxWriteBatchSize) {
            _numReaped += removeSessionsRecords(_opCtx, _sessionsCollection, lsids);
            _shards.erase(shardId);
        }
    }
Example #3
0
void ChunkManager::getShardIdsForQuery(OperationContext* txn,
                                       const BSONObj& query,
                                       set<ShardId>* shardIds) const {
    auto statusWithCQ =
        CanonicalQuery::canonicalize(NamespaceString(_ns), query, ExtensionsCallbackNoop());

    uassertStatusOK(statusWithCQ.getStatus());
    unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue());

    // Query validation
    if (QueryPlannerCommon::hasNode(cq->root(), MatchExpression::GEO_NEAR)) {
        uassert(13501, "use geoNear command rather than $near query", false);
    }

    // Fast path for targeting equalities on the shard key.
    auto shardKeyToFind = _keyPattern.extractShardKeyFromQuery(*cq);
    if (shardKeyToFind.isOK() && !shardKeyToFind.getValue().isEmpty()) {
        auto chunk = findIntersectingChunk(txn, shardKeyToFind.getValue());
        shardIds->insert(chunk->getShardId());
        return;
    }

    // Transforms query into bounds for each field in the shard key
    // for example :
    //   Key { a: 1, b: 1 },
    //   Query { a : { $gte : 1, $lt : 2 },
    //            b : { $gte : 3, $lt : 4 } }
    //   => Bounds { a : [1, 2), b : [3, 4) }
    IndexBounds bounds = getIndexBoundsForQuery(_keyPattern.toBSON(), *cq);

    // Transforms bounds for each shard key field into full shard key ranges
    // for example :
    //   Key { a : 1, b : 1 }
    //   Bounds { a : [1, 2), b : [3, 4) }
    //   => Ranges { a : 1, b : 3 } => { a : 2, b : 4 }
    BoundList ranges = _keyPattern.flattenBounds(bounds);

    for (BoundList::const_iterator it = ranges.begin(); it != ranges.end(); ++it) {
        getShardIdsForRange(*shardIds, it->first /*min*/, it->second /*max*/);

        // once we know we need to visit all shards no need to keep looping
        if (shardIds->size() == _shardIds.size())
            break;
    }

    // SERVER-4914 Some clients of getShardIdsForQuery() assume at least one shard will be
    // returned.  For now, we satisfy that assumption by adding a shard with no matches rather
    // than return an empty set of shards.
    if (shardIds->empty()) {
        massert(16068, "no chunk ranges available", !_chunkRanges.ranges().empty());
        shardIds->insert(_chunkRanges.ranges().begin()->second->getShardId());
    }
}
size_t ShardSplitter::getShardSplitCnt(folly::StringPiece routingKey,
                                       folly::StringPiece& shard) const {
  if (!getShardId(routingKey, shard)) {
    return 1;
  }

  auto splitIt = shardSplits_.find(shard);
  if (splitIt == shardSplits_.end()) {
    return 1;
  }
  return splitIt->second;
}
    void handleLsid(const LogicalSessionId& lsid) {
        invariant(_cm);
        const auto chunk = _cm->findIntersectingChunkWithSimpleCollation(lsid.toBSON());
        const auto shardId = chunk.getShardId();

        auto& lsids = _shards[shardId];
        lsids.insert(lsid);

        if (lsids.size() >= write_ops::kMaxWriteBatchSize) {
            _numReaped += removeSessionsRecords(_opCtx, _sessionsCollection, lsids);
            _shards.erase(shardId);
        }
    }
Example #6
0
bool ConstShardHashFunc::shardLookup(folly::StringPiece key,
                                     size_t* result) const {
  folly::StringPiece shard;
  if (!getShardId(key, shard)) {
    return false;
  }
  for (const auto& iter: shard) {
    if (!isdigit(iter)) {
      return false;
    }
  }
  size_t index;
  try {
    index = folly::to<size_t>(shard);
  } catch (...) {
    return false;
  }

  if (index >= n_) {
    return false; // out of bounds
  }
  *result = index;
  return true;
}
Example #7
0
void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* opCtx,
                                           ChunkManager* manager,
                                           Chunk* chunk,
                                           long dataWritten) {
    // Disable lastError tracking so that any errors, which occur during auto-split do not get
    // bubbled up on the client connection doing a write.
    LastError::Disabled d(&LastError::get(cc()));

    const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration();

    const bool minIsInf =
        (0 == manager->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk->getMin()));
    const bool maxIsInf =
        (0 == manager->getShardKeyPattern().getKeyPattern().globalMax().woCompare(chunk->getMax()));

    const uint64_t chunkBytesWritten = chunk->addBytesWritten(dataWritten);

    const uint64_t desiredChunkSize =
        calculateDesiredChunkSize(balancerConfig->getMaxChunkSizeBytes(), manager->numChunks());

    if (!chunk->shouldSplit(desiredChunkSize, minIsInf, maxIsInf)) {
        return;
    }

    const NamespaceString nss(manager->getns());

    if (!manager->_autoSplitThrottle._splitTickets.tryAcquire()) {
        LOG(1) << "won't auto split because not enough tickets: " << nss;
        return;
    }

    TicketHolderReleaser releaser(&(manager->_autoSplitThrottle._splitTickets));

    const ChunkRange chunkRange(chunk->getMin(), chunk->getMax());

    try {
        // Ensure we have the most up-to-date balancer configuration
        uassertStatusOK(balancerConfig->refreshAndCheck(opCtx));

        if (!balancerConfig->getShouldAutoSplit()) {
            return;
        }

        LOG(1) << "about to initiate autosplit: " << redact(chunk->toString())
               << " dataWritten: " << chunkBytesWritten
               << " desiredChunkSize: " << desiredChunkSize;

        const uint64_t chunkSizeToUse = [&]() {
            const uint64_t estNumSplitPoints = chunkBytesWritten / desiredChunkSize * 2;

            if (estNumSplitPoints >= kTooManySplitPoints) {
                // The current desired chunk size will split the chunk into lots of small chunk and
                // at the worst case this can result into thousands of chunks. So check and see if a
                // bigger value can be used.
                return std::min(chunkBytesWritten, balancerConfig->getMaxChunkSizeBytes());
            } else {
                return desiredChunkSize;
            }
        }();

        auto splitPoints =
            uassertStatusOK(shardutil::selectChunkSplitPoints(opCtx,
                                                              chunk->getShardId(),
                                                              nss,
                                                              manager->getShardKeyPattern(),
                                                              chunkRange,
                                                              chunkSizeToUse,
                                                              boost::none));

        if (splitPoints.size() <= 1) {
            // No split points means there isn't enough data to split on; 1 split point means we
            // have
            // between half the chunk size to full chunk size so there is no need to split yet
            chunk->clearBytesWritten();
            return;
        }

        if (minIsInf || maxIsInf) {
            // We don't want to reset _dataWritten since we want to check the other side right away
        } else {
            // We're splitting, so should wait a bit
            chunk->clearBytesWritten();
        }

        // We assume that if the chunk being split is the first (or last) one on the collection,
        // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the
        // very first (or last) key as a split point.
        //
        // This heuristic is skipped for "special" shard key patterns that are not likely to produce
        // monotonically increasing or decreasing values (e.g. hashed shard keys).
        if (KeyPattern::isOrderedKeyPattern(manager->getShardKeyPattern().toBSON())) {
            if (minIsInf) {
                BSONObj key = findExtremeKeyForShard(
                    opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), true);
                if (!key.isEmpty()) {
                    splitPoints.front() = key.getOwned();
                }
            } else if (maxIsInf) {
                BSONObj key = findExtremeKeyForShard(
                    opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), false);
                if (!key.isEmpty()) {
                    splitPoints.back() = key.getOwned();
                }
            }
        }

        const auto suggestedMigrateChunk =
            uassertStatusOK(shardutil::splitChunkAtMultiplePoints(opCtx,
                                                                  chunk->getShardId(),
                                                                  nss,
                                                                  manager->getShardKeyPattern(),
                                                                  manager->getVersion(),
                                                                  chunkRange,
                                                                  splitPoints));

        // Balance the resulting chunks if the option is enabled and if the shard suggested a chunk
        // to balance
        const bool shouldBalance = [&]() {
            if (!balancerConfig->shouldBalanceForAutoSplit())
                return false;

            auto collStatus =
                Grid::get(opCtx)->catalogClient()->getCollection(opCtx, manager->getns());
            if (!collStatus.isOK()) {
                log() << "Auto-split for " << nss << " failed to load collection metadata"
                      << causedBy(redact(collStatus.getStatus()));
                return false;
            }

            return collStatus.getValue().value.getAllowBalance();
        }();

        log() << "autosplitted " << nss << " chunk: " << redact(chunk->toString()) << " into "
              << (splitPoints.size() + 1) << " parts (desiredChunkSize " << desiredChunkSize << ")"
              << (suggestedMigrateChunk ? "" : (std::string) " (migrate suggested" +
                          (shouldBalance ? ")" : ", but no migrations allowed)"));

        // Reload the chunk manager after the split
        auto routingInfo = uassertStatusOK(
            Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
                                                                                         nss));

        if (!shouldBalance || !suggestedMigrateChunk) {
            return;
        }

        // Top chunk optimization - try to move the top chunk out of this shard to prevent the hot
        // spot from staying on a single shard. This is based on the assumption that succeeding
        // inserts will fall on the top chunk.

        // We need to use the latest chunk manager (after the split) in order to have the most
        // up-to-date view of the chunk we are about to move
        auto suggestedChunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation(
            suggestedMigrateChunk->getMin());

        ChunkType chunkToMove;
        chunkToMove.setNS(nss.ns());
        chunkToMove.setShard(suggestedChunk->getShardId());
        chunkToMove.setMin(suggestedChunk->getMin());
        chunkToMove.setMax(suggestedChunk->getMax());
        chunkToMove.setVersion(suggestedChunk->getLastmod());

        uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, chunkToMove));

        // Ensure the collection gets reloaded because of the move
        Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss);
    } catch (const DBException& ex) {
        chunk->clearBytesWritten();

        if (ErrorCodes::isStaleShardingError(ErrorCodes::Error(ex.getCode()))) {
            log() << "Unable to auto-split chunk " << redact(chunkRange.toString()) << causedBy(ex)
                  << ", going to invalidate routing table entry for " << nss;
            Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss);
        }
    }
}
Example #8
0
void ChunkSplitter::_runAutosplit(const NamespaceString& nss,
                                  const BSONObj& min,
                                  const BSONObj& max,
                                  long dataWritten) {
    if (!_isPrimary) {
        return;
    }

    try {
        const auto opCtx = cc().makeOperationContext();
        const auto routingInfo = uassertStatusOK(
            Grid::get(opCtx.get())->catalogCache()->getCollectionRoutingInfo(opCtx.get(), nss));

        uassert(ErrorCodes::NamespaceNotSharded,
                "Could not split chunk. Collection is no longer sharded",
                routingInfo.cm());

        const auto cm = routingInfo.cm();
        const auto chunk = cm->findIntersectingChunkWithSimpleCollation(min);

        // Stop if chunk's range differs from the range we were expecting to split.
        if ((0 != chunk.getMin().woCompare(min)) || (0 != chunk.getMax().woCompare(max)) ||
            (chunk.getShardId() != ShardingState::get(opCtx.get())->getShardName())) {
            LOG(1) << "Cannot auto-split chunk with range '"
                   << redact(ChunkRange(min, max).toString()) << "' for nss '" << nss
                   << "' on shard '" << ShardingState::get(opCtx.get())->getShardName()
                   << "' because since scheduling auto-split the chunk has been changed to '"
                   << redact(chunk.toString()) << "'";
            return;
        }

        const ChunkRange chunkRange(chunk.getMin(), chunk.getMax());

        const auto balancerConfig = Grid::get(opCtx.get())->getBalancerConfiguration();
        // Ensure we have the most up-to-date balancer configuration
        uassertStatusOK(balancerConfig->refreshAndCheck(opCtx.get()));

        if (!balancerConfig->getShouldAutoSplit()) {
            return;
        }

        const uint64_t maxChunkSizeBytes = balancerConfig->getMaxChunkSizeBytes();

        LOG(1) << "about to initiate autosplit: " << redact(chunk.toString())
               << " dataWritten since last check: " << dataWritten
               << " maxChunkSizeBytes: " << maxChunkSizeBytes;

        auto splitPoints = uassertStatusOK(splitVector(opCtx.get(),
                                                       nss,
                                                       cm->getShardKeyPattern().toBSON(),
                                                       chunk.getMin(),
                                                       chunk.getMax(),
                                                       false,
                                                       boost::none,
                                                       boost::none,
                                                       boost::none,
                                                       maxChunkSizeBytes));

        if (splitPoints.size() <= 1) {
            // No split points means there isn't enough data to split on; 1 split point means we
            // have between half the chunk size to full chunk size so there is no need to split yet
            return;
        }

        // We assume that if the chunk being split is the first (or last) one on the collection,
        // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the
        // very first (or last) key as a split point.
        //
        // This heuristic is skipped for "special" shard key patterns that are not likely to produce
        // monotonically increasing or decreasing values (e.g. hashed shard keys).

        // Keeps track of the minKey of the top chunk after the split so we can migrate the chunk.
        BSONObj topChunkMinKey;

        if (KeyPattern::isOrderedKeyPattern(cm->getShardKeyPattern().toBSON())) {
            if (0 ==
                cm->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk.getMin())) {
                // MinKey is infinity (This is the first chunk on the collection)
                BSONObj key =
                    findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), true);
                if (!key.isEmpty()) {
                    splitPoints.front() = key.getOwned();
                    topChunkMinKey = cm->getShardKeyPattern().getKeyPattern().globalMin();
                }
            } else if (0 ==
                       cm->getShardKeyPattern().getKeyPattern().globalMax().woCompare(
                           chunk.getMax())) {
                // MaxKey is infinity (This is the last chunk on the collection)
                BSONObj key =
                    findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), false);
                if (!key.isEmpty()) {
                    splitPoints.back() = key.getOwned();
                    topChunkMinKey = key.getOwned();
                }
            }
        }

        uassertStatusOK(splitChunkAtMultiplePoints(opCtx.get(),
                                                   chunk.getShardId(),
                                                   nss,
                                                   cm->getShardKeyPattern(),
                                                   cm->getVersion(),
                                                   chunkRange,
                                                   splitPoints));

        const bool shouldBalance = isAutoBalanceEnabled(opCtx.get(), nss, balancerConfig);

        log() << "autosplitted " << nss << " chunk: " << redact(chunk.toString()) << " into "
              << (splitPoints.size() + 1) << " parts (maxChunkSizeBytes " << maxChunkSizeBytes
              << ")"
              << (topChunkMinKey.isEmpty() ? "" : " (top chunk migration suggested" +
                          (std::string)(shouldBalance ? ")" : ", but no migrations allowed)"));

        // Balance the resulting chunks if the autobalance option is enabled and if we split at the
        // first or last chunk on the collection as part of top chunk optimization.

        if (!shouldBalance || topChunkMinKey.isEmpty()) {
            return;
        }

        // Tries to move the top chunk out of the shard to prevent the hot spot from staying on a
        // single shard. This is based on the assumption that succeeding inserts will fall on the
        // top chunk.
        moveChunk(opCtx.get(), nss, topChunkMinKey);
    } catch (const DBException& ex) {
        log() << "Unable to auto-split chunk " << redact(ChunkRange(min, max).toString())
              << " in nss " << nss << causedBy(redact(ex.toStatus()));
    } catch (const std::exception& e) {
        log() << "caught exception while splitting chunk: " << redact(e.what());
    }
}