ShardEndpoint ChunkManagerTargeter::_targetShardKey(const BSONObj& shardKey, const BSONObj& collation, long long estDataSize) const { const auto chunk = _routingInfo->cm()->findIntersectingChunk(shardKey, collation); return {chunk.getShardId(), _routingInfo->cm()->getVersion(chunk.getShardId())}; }
void handleLsid(const LogicalSessionId& lsid) { // There are some lifetime issues with when the reaper starts up versus when the grid is // available. Moving routing info fetching until after we have a transaction moves us past // the problem. // // Also, we should only need the chunk case, but that'll wait until the sessions table is // actually sharded. if (!(_cm || _primary)) { auto routingInfo = uassertStatusOK(Grid::get(_opCtx)->catalogCache()->getCollectionRoutingInfo( _opCtx, SessionsCollection::kSessionsNamespaceString)); _cm = routingInfo.cm(); _primary = routingInfo.db().primary(); } ShardId shardId; if (_cm) { const auto chunk = _cm->findIntersectingChunkWithSimpleCollation(lsid.toBSON()); shardId = chunk.getShardId(); } else { shardId = _primary->getId(); } auto& lsids = _shards[shardId]; lsids.insert(lsid); if (lsids.size() > write_ops::kMaxWriteBatchSize) { _numReaped += removeSessionsRecords(_opCtx, _sessionsCollection, lsids); _shards.erase(shardId); } }
void ChunkManager::getShardIdsForQuery(OperationContext* txn, const BSONObj& query, set<ShardId>* shardIds) const { auto statusWithCQ = CanonicalQuery::canonicalize(NamespaceString(_ns), query, ExtensionsCallbackNoop()); uassertStatusOK(statusWithCQ.getStatus()); unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); // Query validation if (QueryPlannerCommon::hasNode(cq->root(), MatchExpression::GEO_NEAR)) { uassert(13501, "use geoNear command rather than $near query", false); } // Fast path for targeting equalities on the shard key. auto shardKeyToFind = _keyPattern.extractShardKeyFromQuery(*cq); if (shardKeyToFind.isOK() && !shardKeyToFind.getValue().isEmpty()) { auto chunk = findIntersectingChunk(txn, shardKeyToFind.getValue()); shardIds->insert(chunk->getShardId()); return; } // Transforms query into bounds for each field in the shard key // for example : // Key { a: 1, b: 1 }, // Query { a : { $gte : 1, $lt : 2 }, // b : { $gte : 3, $lt : 4 } } // => Bounds { a : [1, 2), b : [3, 4) } IndexBounds bounds = getIndexBoundsForQuery(_keyPattern.toBSON(), *cq); // Transforms bounds for each shard key field into full shard key ranges // for example : // Key { a : 1, b : 1 } // Bounds { a : [1, 2), b : [3, 4) } // => Ranges { a : 1, b : 3 } => { a : 2, b : 4 } BoundList ranges = _keyPattern.flattenBounds(bounds); for (BoundList::const_iterator it = ranges.begin(); it != ranges.end(); ++it) { getShardIdsForRange(*shardIds, it->first /*min*/, it->second /*max*/); // once we know we need to visit all shards no need to keep looping if (shardIds->size() == _shardIds.size()) break; } // SERVER-4914 Some clients of getShardIdsForQuery() assume at least one shard will be // returned. For now, we satisfy that assumption by adding a shard with no matches rather // than return an empty set of shards. if (shardIds->empty()) { massert(16068, "no chunk ranges available", !_chunkRanges.ranges().empty()); shardIds->insert(_chunkRanges.ranges().begin()->second->getShardId()); } }
size_t ShardSplitter::getShardSplitCnt(folly::StringPiece routingKey, folly::StringPiece& shard) const { if (!getShardId(routingKey, shard)) { return 1; } auto splitIt = shardSplits_.find(shard); if (splitIt == shardSplits_.end()) { return 1; } return splitIt->second; }
void handleLsid(const LogicalSessionId& lsid) { invariant(_cm); const auto chunk = _cm->findIntersectingChunkWithSimpleCollation(lsid.toBSON()); const auto shardId = chunk.getShardId(); auto& lsids = _shards[shardId]; lsids.insert(lsid); if (lsids.size() >= write_ops::kMaxWriteBatchSize) { _numReaped += removeSessionsRecords(_opCtx, _sessionsCollection, lsids); _shards.erase(shardId); } }
bool ConstShardHashFunc::shardLookup(folly::StringPiece key, size_t* result) const { folly::StringPiece shard; if (!getShardId(key, shard)) { return false; } for (const auto& iter: shard) { if (!isdigit(iter)) { return false; } } size_t index; try { index = folly::to<size_t>(shard); } catch (...) { return false; } if (index >= n_) { return false; // out of bounds } *result = index; return true; }
void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* opCtx, ChunkManager* manager, Chunk* chunk, long dataWritten) { // Disable lastError tracking so that any errors, which occur during auto-split do not get // bubbled up on the client connection doing a write. LastError::Disabled d(&LastError::get(cc())); const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration(); const bool minIsInf = (0 == manager->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk->getMin())); const bool maxIsInf = (0 == manager->getShardKeyPattern().getKeyPattern().globalMax().woCompare(chunk->getMax())); const uint64_t chunkBytesWritten = chunk->addBytesWritten(dataWritten); const uint64_t desiredChunkSize = calculateDesiredChunkSize(balancerConfig->getMaxChunkSizeBytes(), manager->numChunks()); if (!chunk->shouldSplit(desiredChunkSize, minIsInf, maxIsInf)) { return; } const NamespaceString nss(manager->getns()); if (!manager->_autoSplitThrottle._splitTickets.tryAcquire()) { LOG(1) << "won't auto split because not enough tickets: " << nss; return; } TicketHolderReleaser releaser(&(manager->_autoSplitThrottle._splitTickets)); const ChunkRange chunkRange(chunk->getMin(), chunk->getMax()); try { // Ensure we have the most up-to-date balancer configuration uassertStatusOK(balancerConfig->refreshAndCheck(opCtx)); if (!balancerConfig->getShouldAutoSplit()) { return; } LOG(1) << "about to initiate autosplit: " << redact(chunk->toString()) << " dataWritten: " << chunkBytesWritten << " desiredChunkSize: " << desiredChunkSize; const uint64_t chunkSizeToUse = [&]() { const uint64_t estNumSplitPoints = chunkBytesWritten / desiredChunkSize * 2; if (estNumSplitPoints >= kTooManySplitPoints) { // The current desired chunk size will split the chunk into lots of small chunk and // at the worst case this can result into thousands of chunks. So check and see if a // bigger value can be used. return std::min(chunkBytesWritten, balancerConfig->getMaxChunkSizeBytes()); } else { return desiredChunkSize; } }(); auto splitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints(opCtx, chunk->getShardId(), nss, manager->getShardKeyPattern(), chunkRange, chunkSizeToUse, boost::none)); if (splitPoints.size() <= 1) { // No split points means there isn't enough data to split on; 1 split point means we // have // between half the chunk size to full chunk size so there is no need to split yet chunk->clearBytesWritten(); return; } if (minIsInf || maxIsInf) { // We don't want to reset _dataWritten since we want to check the other side right away } else { // We're splitting, so should wait a bit chunk->clearBytesWritten(); } // We assume that if the chunk being split is the first (or last) one on the collection, // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the // very first (or last) key as a split point. // // This heuristic is skipped for "special" shard key patterns that are not likely to produce // monotonically increasing or decreasing values (e.g. hashed shard keys). if (KeyPattern::isOrderedKeyPattern(manager->getShardKeyPattern().toBSON())) { if (minIsInf) { BSONObj key = findExtremeKeyForShard( opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), true); if (!key.isEmpty()) { splitPoints.front() = key.getOwned(); } } else if (maxIsInf) { BSONObj key = findExtremeKeyForShard( opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), false); if (!key.isEmpty()) { splitPoints.back() = key.getOwned(); } } } const auto suggestedMigrateChunk = uassertStatusOK(shardutil::splitChunkAtMultiplePoints(opCtx, chunk->getShardId(), nss, manager->getShardKeyPattern(), manager->getVersion(), chunkRange, splitPoints)); // Balance the resulting chunks if the option is enabled and if the shard suggested a chunk // to balance const bool shouldBalance = [&]() { if (!balancerConfig->shouldBalanceForAutoSplit()) return false; auto collStatus = Grid::get(opCtx)->catalogClient()->getCollection(opCtx, manager->getns()); if (!collStatus.isOK()) { log() << "Auto-split for " << nss << " failed to load collection metadata" << causedBy(redact(collStatus.getStatus())); return false; } return collStatus.getValue().value.getAllowBalance(); }(); log() << "autosplitted " << nss << " chunk: " << redact(chunk->toString()) << " into " << (splitPoints.size() + 1) << " parts (desiredChunkSize " << desiredChunkSize << ")" << (suggestedMigrateChunk ? "" : (std::string) " (migrate suggested" + (shouldBalance ? ")" : ", but no migrations allowed)")); // Reload the chunk manager after the split auto routingInfo = uassertStatusOK( Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss)); if (!shouldBalance || !suggestedMigrateChunk) { return; } // Top chunk optimization - try to move the top chunk out of this shard to prevent the hot // spot from staying on a single shard. This is based on the assumption that succeeding // inserts will fall on the top chunk. // We need to use the latest chunk manager (after the split) in order to have the most // up-to-date view of the chunk we are about to move auto suggestedChunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation( suggestedMigrateChunk->getMin()); ChunkType chunkToMove; chunkToMove.setNS(nss.ns()); chunkToMove.setShard(suggestedChunk->getShardId()); chunkToMove.setMin(suggestedChunk->getMin()); chunkToMove.setMax(suggestedChunk->getMax()); chunkToMove.setVersion(suggestedChunk->getLastmod()); uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, chunkToMove)); // Ensure the collection gets reloaded because of the move Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); } catch (const DBException& ex) { chunk->clearBytesWritten(); if (ErrorCodes::isStaleShardingError(ErrorCodes::Error(ex.getCode()))) { log() << "Unable to auto-split chunk " << redact(chunkRange.toString()) << causedBy(ex) << ", going to invalidate routing table entry for " << nss; Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); } } }
void ChunkSplitter::_runAutosplit(const NamespaceString& nss, const BSONObj& min, const BSONObj& max, long dataWritten) { if (!_isPrimary) { return; } try { const auto opCtx = cc().makeOperationContext(); const auto routingInfo = uassertStatusOK( Grid::get(opCtx.get())->catalogCache()->getCollectionRoutingInfo(opCtx.get(), nss)); uassert(ErrorCodes::NamespaceNotSharded, "Could not split chunk. Collection is no longer sharded", routingInfo.cm()); const auto cm = routingInfo.cm(); const auto chunk = cm->findIntersectingChunkWithSimpleCollation(min); // Stop if chunk's range differs from the range we were expecting to split. if ((0 != chunk.getMin().woCompare(min)) || (0 != chunk.getMax().woCompare(max)) || (chunk.getShardId() != ShardingState::get(opCtx.get())->getShardName())) { LOG(1) << "Cannot auto-split chunk with range '" << redact(ChunkRange(min, max).toString()) << "' for nss '" << nss << "' on shard '" << ShardingState::get(opCtx.get())->getShardName() << "' because since scheduling auto-split the chunk has been changed to '" << redact(chunk.toString()) << "'"; return; } const ChunkRange chunkRange(chunk.getMin(), chunk.getMax()); const auto balancerConfig = Grid::get(opCtx.get())->getBalancerConfiguration(); // Ensure we have the most up-to-date balancer configuration uassertStatusOK(balancerConfig->refreshAndCheck(opCtx.get())); if (!balancerConfig->getShouldAutoSplit()) { return; } const uint64_t maxChunkSizeBytes = balancerConfig->getMaxChunkSizeBytes(); LOG(1) << "about to initiate autosplit: " << redact(chunk.toString()) << " dataWritten since last check: " << dataWritten << " maxChunkSizeBytes: " << maxChunkSizeBytes; auto splitPoints = uassertStatusOK(splitVector(opCtx.get(), nss, cm->getShardKeyPattern().toBSON(), chunk.getMin(), chunk.getMax(), false, boost::none, boost::none, boost::none, maxChunkSizeBytes)); if (splitPoints.size() <= 1) { // No split points means there isn't enough data to split on; 1 split point means we // have between half the chunk size to full chunk size so there is no need to split yet return; } // We assume that if the chunk being split is the first (or last) one on the collection, // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the // very first (or last) key as a split point. // // This heuristic is skipped for "special" shard key patterns that are not likely to produce // monotonically increasing or decreasing values (e.g. hashed shard keys). // Keeps track of the minKey of the top chunk after the split so we can migrate the chunk. BSONObj topChunkMinKey; if (KeyPattern::isOrderedKeyPattern(cm->getShardKeyPattern().toBSON())) { if (0 == cm->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk.getMin())) { // MinKey is infinity (This is the first chunk on the collection) BSONObj key = findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), true); if (!key.isEmpty()) { splitPoints.front() = key.getOwned(); topChunkMinKey = cm->getShardKeyPattern().getKeyPattern().globalMin(); } } else if (0 == cm->getShardKeyPattern().getKeyPattern().globalMax().woCompare( chunk.getMax())) { // MaxKey is infinity (This is the last chunk on the collection) BSONObj key = findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), false); if (!key.isEmpty()) { splitPoints.back() = key.getOwned(); topChunkMinKey = key.getOwned(); } } } uassertStatusOK(splitChunkAtMultiplePoints(opCtx.get(), chunk.getShardId(), nss, cm->getShardKeyPattern(), cm->getVersion(), chunkRange, splitPoints)); const bool shouldBalance = isAutoBalanceEnabled(opCtx.get(), nss, balancerConfig); log() << "autosplitted " << nss << " chunk: " << redact(chunk.toString()) << " into " << (splitPoints.size() + 1) << " parts (maxChunkSizeBytes " << maxChunkSizeBytes << ")" << (topChunkMinKey.isEmpty() ? "" : " (top chunk migration suggested" + (std::string)(shouldBalance ? ")" : ", but no migrations allowed)")); // Balance the resulting chunks if the autobalance option is enabled and if we split at the // first or last chunk on the collection as part of top chunk optimization. if (!shouldBalance || topChunkMinKey.isEmpty()) { return; } // Tries to move the top chunk out of the shard to prevent the hot spot from staying on a // single shard. This is based on the assumption that succeeding inserts will fall on the // top chunk. moveChunk(opCtx.get(), nss, topChunkMinKey); } catch (const DBException& ex) { log() << "Unable to auto-split chunk " << redact(ChunkRange(min, max).toString()) << " in nss " << nss << causedBy(redact(ex.toStatus())); } catch (const std::exception& e) { log() << "caught exception while splitting chunk: " << redact(e.what()); } }