Status BalancerConfiguration::_refreshAutoSplitSettings(OperationContext* opCtx) { AutoSplitSettingsType settings = AutoSplitSettingsType::createDefault(); auto settingsObjStatus = Grid::get(opCtx)->catalogClient()->getGlobalSettings(opCtx, AutoSplitSettingsType::kKey); if (settingsObjStatus.isOK()) { auto settingsStatus = AutoSplitSettingsType::fromBSON(settingsObjStatus.getValue()); if (!settingsStatus.isOK()) { return settingsStatus.getStatus(); } settings = std::move(settingsStatus.getValue()); } else if (settingsObjStatus != ErrorCodes::NoMatchingDocument) { return settingsObjStatus.getStatus(); } if (settings.getShouldAutoSplit() != getShouldAutoSplit()) { log() << "ShouldAutoSplit changing from " << getShouldAutoSplit() << " to " << settings.getShouldAutoSplit(); _shouldAutoSplit.store(settings.getShouldAutoSplit()); } return Status::OK(); }
void updateChunkWriteStatsAndSplitIfNeeded(OperationContext* opCtx, ChunkManager* manager, Chunk* chunk, long dataWritten) { // Disable lastError tracking so that any errors, which occur during auto-split do not get // bubbled up on the client connection doing a write. LastError::Disabled d(&LastError::get(cc())); const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration(); const bool minIsInf = (0 == manager->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk->getMin())); const bool maxIsInf = (0 == manager->getShardKeyPattern().getKeyPattern().globalMax().woCompare(chunk->getMax())); const uint64_t chunkBytesWritten = chunk->addBytesWritten(dataWritten); const uint64_t desiredChunkSize = calculateDesiredChunkSize(balancerConfig->getMaxChunkSizeBytes(), manager->numChunks()); if (!chunk->shouldSplit(desiredChunkSize, minIsInf, maxIsInf)) { return; } const NamespaceString nss(manager->getns()); if (!manager->_autoSplitThrottle._splitTickets.tryAcquire()) { LOG(1) << "won't auto split because not enough tickets: " << nss; return; } TicketHolderReleaser releaser(&(manager->_autoSplitThrottle._splitTickets)); const ChunkRange chunkRange(chunk->getMin(), chunk->getMax()); try { // Ensure we have the most up-to-date balancer configuration uassertStatusOK(balancerConfig->refreshAndCheck(opCtx)); if (!balancerConfig->getShouldAutoSplit()) { return; } LOG(1) << "about to initiate autosplit: " << redact(chunk->toString()) << " dataWritten: " << chunkBytesWritten << " desiredChunkSize: " << desiredChunkSize; const uint64_t chunkSizeToUse = [&]() { const uint64_t estNumSplitPoints = chunkBytesWritten / desiredChunkSize * 2; if (estNumSplitPoints >= kTooManySplitPoints) { // The current desired chunk size will split the chunk into lots of small chunk and // at the worst case this can result into thousands of chunks. So check and see if a // bigger value can be used. return std::min(chunkBytesWritten, balancerConfig->getMaxChunkSizeBytes()); } else { return desiredChunkSize; } }(); auto splitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints(opCtx, chunk->getShardId(), nss, manager->getShardKeyPattern(), chunkRange, chunkSizeToUse, boost::none)); if (splitPoints.size() <= 1) { // No split points means there isn't enough data to split on; 1 split point means we // have // between half the chunk size to full chunk size so there is no need to split yet chunk->clearBytesWritten(); return; } if (minIsInf || maxIsInf) { // We don't want to reset _dataWritten since we want to check the other side right away } else { // We're splitting, so should wait a bit chunk->clearBytesWritten(); } // We assume that if the chunk being split is the first (or last) one on the collection, // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the // very first (or last) key as a split point. // // This heuristic is skipped for "special" shard key patterns that are not likely to produce // monotonically increasing or decreasing values (e.g. hashed shard keys). if (KeyPattern::isOrderedKeyPattern(manager->getShardKeyPattern().toBSON())) { if (minIsInf) { BSONObj key = findExtremeKeyForShard( opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), true); if (!key.isEmpty()) { splitPoints.front() = key.getOwned(); } } else if (maxIsInf) { BSONObj key = findExtremeKeyForShard( opCtx, nss, chunk->getShardId(), manager->getShardKeyPattern(), false); if (!key.isEmpty()) { splitPoints.back() = key.getOwned(); } } } const auto suggestedMigrateChunk = uassertStatusOK(shardutil::splitChunkAtMultiplePoints(opCtx, chunk->getShardId(), nss, manager->getShardKeyPattern(), manager->getVersion(), chunkRange, splitPoints)); // Balance the resulting chunks if the option is enabled and if the shard suggested a chunk // to balance const bool shouldBalance = [&]() { if (!balancerConfig->shouldBalanceForAutoSplit()) return false; auto collStatus = Grid::get(opCtx)->catalogClient()->getCollection(opCtx, manager->getns()); if (!collStatus.isOK()) { log() << "Auto-split for " << nss << " failed to load collection metadata" << causedBy(redact(collStatus.getStatus())); return false; } return collStatus.getValue().value.getAllowBalance(); }(); log() << "autosplitted " << nss << " chunk: " << redact(chunk->toString()) << " into " << (splitPoints.size() + 1) << " parts (desiredChunkSize " << desiredChunkSize << ")" << (suggestedMigrateChunk ? "" : (std::string) " (migrate suggested" + (shouldBalance ? ")" : ", but no migrations allowed)")); // Reload the chunk manager after the split auto routingInfo = uassertStatusOK( Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss)); if (!shouldBalance || !suggestedMigrateChunk) { return; } // Top chunk optimization - try to move the top chunk out of this shard to prevent the hot // spot from staying on a single shard. This is based on the assumption that succeeding // inserts will fall on the top chunk. // We need to use the latest chunk manager (after the split) in order to have the most // up-to-date view of the chunk we are about to move auto suggestedChunk = routingInfo.cm()->findIntersectingChunkWithSimpleCollation( suggestedMigrateChunk->getMin()); ChunkType chunkToMove; chunkToMove.setNS(nss.ns()); chunkToMove.setShard(suggestedChunk->getShardId()); chunkToMove.setMin(suggestedChunk->getMin()); chunkToMove.setMax(suggestedChunk->getMax()); chunkToMove.setVersion(suggestedChunk->getLastmod()); uassertStatusOK(configsvr_client::rebalanceChunk(opCtx, chunkToMove)); // Ensure the collection gets reloaded because of the move Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); } catch (const DBException& ex) { chunk->clearBytesWritten(); if (ErrorCodes::isStaleShardingError(ErrorCodes::Error(ex.getCode()))) { log() << "Unable to auto-split chunk " << redact(chunkRange.toString()) << causedBy(ex) << ", going to invalidate routing table entry for " << nss; Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss); } } }
void ChunkSplitter::_runAutosplit(const NamespaceString& nss, const BSONObj& min, const BSONObj& max, long dataWritten) { if (!_isPrimary) { return; } try { const auto opCtx = cc().makeOperationContext(); const auto routingInfo = uassertStatusOK( Grid::get(opCtx.get())->catalogCache()->getCollectionRoutingInfo(opCtx.get(), nss)); uassert(ErrorCodes::NamespaceNotSharded, "Could not split chunk. Collection is no longer sharded", routingInfo.cm()); const auto cm = routingInfo.cm(); const auto chunk = cm->findIntersectingChunkWithSimpleCollation(min); // Stop if chunk's range differs from the range we were expecting to split. if ((0 != chunk.getMin().woCompare(min)) || (0 != chunk.getMax().woCompare(max)) || (chunk.getShardId() != ShardingState::get(opCtx.get())->getShardName())) { LOG(1) << "Cannot auto-split chunk with range '" << redact(ChunkRange(min, max).toString()) << "' for nss '" << nss << "' on shard '" << ShardingState::get(opCtx.get())->getShardName() << "' because since scheduling auto-split the chunk has been changed to '" << redact(chunk.toString()) << "'"; return; } const ChunkRange chunkRange(chunk.getMin(), chunk.getMax()); const auto balancerConfig = Grid::get(opCtx.get())->getBalancerConfiguration(); // Ensure we have the most up-to-date balancer configuration uassertStatusOK(balancerConfig->refreshAndCheck(opCtx.get())); if (!balancerConfig->getShouldAutoSplit()) { return; } const uint64_t maxChunkSizeBytes = balancerConfig->getMaxChunkSizeBytes(); LOG(1) << "about to initiate autosplit: " << redact(chunk.toString()) << " dataWritten since last check: " << dataWritten << " maxChunkSizeBytes: " << maxChunkSizeBytes; auto splitPoints = uassertStatusOK(splitVector(opCtx.get(), nss, cm->getShardKeyPattern().toBSON(), chunk.getMin(), chunk.getMax(), false, boost::none, boost::none, boost::none, maxChunkSizeBytes)); if (splitPoints.size() <= 1) { // No split points means there isn't enough data to split on; 1 split point means we // have between half the chunk size to full chunk size so there is no need to split yet return; } // We assume that if the chunk being split is the first (or last) one on the collection, // this chunk is likely to see more insertions. Instead of splitting mid-chunk, we use the // very first (or last) key as a split point. // // This heuristic is skipped for "special" shard key patterns that are not likely to produce // monotonically increasing or decreasing values (e.g. hashed shard keys). // Keeps track of the minKey of the top chunk after the split so we can migrate the chunk. BSONObj topChunkMinKey; if (KeyPattern::isOrderedKeyPattern(cm->getShardKeyPattern().toBSON())) { if (0 == cm->getShardKeyPattern().getKeyPattern().globalMin().woCompare(chunk.getMin())) { // MinKey is infinity (This is the first chunk on the collection) BSONObj key = findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), true); if (!key.isEmpty()) { splitPoints.front() = key.getOwned(); topChunkMinKey = cm->getShardKeyPattern().getKeyPattern().globalMin(); } } else if (0 == cm->getShardKeyPattern().getKeyPattern().globalMax().woCompare( chunk.getMax())) { // MaxKey is infinity (This is the last chunk on the collection) BSONObj key = findExtremeKeyForShard(opCtx.get(), nss, cm->getShardKeyPattern(), false); if (!key.isEmpty()) { splitPoints.back() = key.getOwned(); topChunkMinKey = key.getOwned(); } } } uassertStatusOK(splitChunkAtMultiplePoints(opCtx.get(), chunk.getShardId(), nss, cm->getShardKeyPattern(), cm->getVersion(), chunkRange, splitPoints)); const bool shouldBalance = isAutoBalanceEnabled(opCtx.get(), nss, balancerConfig); log() << "autosplitted " << nss << " chunk: " << redact(chunk.toString()) << " into " << (splitPoints.size() + 1) << " parts (maxChunkSizeBytes " << maxChunkSizeBytes << ")" << (topChunkMinKey.isEmpty() ? "" : " (top chunk migration suggested" + (std::string)(shouldBalance ? ")" : ", but no migrations allowed)")); // Balance the resulting chunks if the autobalance option is enabled and if we split at the // first or last chunk on the collection as part of top chunk optimization. if (!shouldBalance || topChunkMinKey.isEmpty()) { return; } // Tries to move the top chunk out of the shard to prevent the hot spot from staying on a // single shard. This is based on the assumption that succeeding inserts will fall on the // top chunk. moveChunk(opCtx.get(), nss, topChunkMinKey); } catch (const DBException& ex) { log() << "Unable to auto-split chunk " << redact(ChunkRange(min, max).toString()) << " in nss " << nss << causedBy(redact(ex.toStatus())); } catch (const std::exception& e) { log() << "caught exception while splitting chunk: " << redact(e.what()); } }