const Shard& DBConfig::getShard( const string& ns ){ if ( isSharded( ns ) ) return Shard::EMPTY; uassert( 10178 , "no primary!" , _primary.ok() ); return _primary; }
string DBConfig::getShard( const string& ns ){ if ( isSharded( ns ) ) return ""; uassert( 10178 , "no primary!" , _primary.size() ); return _primary; }
ChunkManager* DBConfig::getChunkManager( const string& ns , bool reload ){ ChunkManager* m = _shards[ns]; if ( m && ! reload ) return m; uassert( 10181 , (string)"not sharded:" + ns , isSharded( ns ) ); if ( m && reload ) log() << "reloading shard info for: " << ns << endl; m = new ChunkManager( this , ns , _sharded[ ns ].key , _sharded[ns].unique ); _shards[ns] = m; return m; }
ChunkManager* DBConfig::shardCollection( const string& ns , ShardKeyPattern fieldsAndOrder , bool unique ){ if ( ! _shardingEnabled ) throw UserException( 8042 , "db doesn't have sharding enabled" ); ChunkManager * info = _shards[ns]; if ( info ) return info; if ( isSharded( ns ) ) throw UserException( 8043 , "already sharded" ); log() << "enable sharding on: " << ns << " with shard key: " << fieldsAndOrder << endl; _sharded[ns] = CollectionInfo( fieldsAndOrder , unique ); info = new ChunkManager( this , ns , fieldsAndOrder , unique ); _shards[ns] = info; return info; }
Status ClusterAggregate::runAggregate(OperationContext* txn, const Namespaces& namespaces, BSONObj cmdObj, int options, BSONObjBuilder* result) { auto scopedShardDbStatus = ScopedShardDatabase::getExisting(txn, namespaces.executionNss.db()); if (!scopedShardDbStatus.isOK()) { appendEmptyResultSet( *result, scopedShardDbStatus.getStatus(), namespaces.requestedNss.ns()); return Status::OK(); } auto request = AggregationRequest::parseFromBSON(namespaces.executionNss, cmdObj); if (!request.isOK()) { return request.getStatus(); } const auto conf = scopedShardDbStatus.getValue().db(); // Determine the appropriate collation and 'resolve' involved namespaces to make the // ExpressionContext. // We won't try to execute anything on a mongos, but we still have to populate this map so that // any $lookups, etc. will be able to have a resolved view definition. It's okay that this is // incorrect, we will repopulate the real resolved namespace map on the mongod. Note that we // need to check if any involved collections are sharded before forwarding an aggregation // command on an unsharded collection. StringMap<ExpressionContext::ResolvedNamespace> resolvedNamespaces; LiteParsedPipeline liteParsedPipeline(request.getValue()); for (auto&& ns : liteParsedPipeline.getInvolvedNamespaces()) { uassert(28769, str::stream() << ns.ns() << " cannot be sharded", !conf->isSharded(ns.ns())); resolvedNamespaces[ns.coll()] = {ns, std::vector<BSONObj>{}}; } if (!conf->isSharded(namespaces.executionNss.ns())) { return aggPassthrough(txn, namespaces, conf, cmdObj, result, options); } auto chunkMgr = conf->getChunkManager(txn, namespaces.executionNss.ns()); std::unique_ptr<CollatorInterface> collation; if (!request.getValue().getCollation().isEmpty()) { collation = uassertStatusOK(CollatorFactoryInterface::get(txn->getServiceContext()) ->makeFromBSON(request.getValue().getCollation())); } else if (chunkMgr->getDefaultCollator()) { collation = chunkMgr->getDefaultCollator()->clone(); } boost::intrusive_ptr<ExpressionContext> mergeCtx = new ExpressionContext( txn, request.getValue(), std::move(collation), std::move(resolvedNamespaces)); mergeCtx->inRouter = true; // explicitly *not* setting mergeCtx->tempDir // Parse and optimize the pipeline specification. auto pipeline = Pipeline::parse(request.getValue().getPipeline(), mergeCtx); if (!pipeline.isOK()) { return pipeline.getStatus(); } pipeline.getValue()->optimizePipeline(); // If the first $match stage is an exact match on the shard key (with a simple collation or // no string matching), we only have to send it to one shard, so send the command to that // shard. BSONObj firstMatchQuery = pipeline.getValue()->getInitialQuery(); BSONObj shardKeyMatches; shardKeyMatches = uassertStatusOK( chunkMgr->getShardKeyPattern().extractShardKeyFromQuery(txn, firstMatchQuery)); bool singleShard = false; if (!shardKeyMatches.isEmpty()) { auto chunk = chunkMgr->findIntersectingChunk( txn, shardKeyMatches, request.getValue().getCollation()); if (chunk.isOK()) { singleShard = true; } } // Don't need to split pipeline if the first $match is an exact match on shard key, unless // there is a stage that needs to be run on the primary shard. const bool needPrimaryShardMerger = pipeline.getValue()->needsPrimaryShardMerger(); const bool needSplit = !singleShard || needPrimaryShardMerger; // Split the pipeline into pieces for mongod(s) and this mongos. If needSplit is true, // 'pipeline' will become the merger side. boost::intrusive_ptr<Pipeline> shardPipeline(needSplit ? pipeline.getValue()->splitForSharded() : pipeline.getValue()); // Create the command for the shards. The 'fromRouter' field means produce output to be // merged. MutableDocument commandBuilder(request.getValue().serializeToCommandObj()); commandBuilder[AggregationRequest::kPipelineName] = Value(shardPipeline->serialize()); if (needSplit) { commandBuilder[AggregationRequest::kFromRouterName] = Value(true); commandBuilder[AggregationRequest::kCursorName] = Value(DOC(AggregationRequest::kBatchSizeName << 0)); } // These fields are not part of the AggregationRequest since they are not handled by the // aggregation subsystem, so we serialize them separately. const std::initializer_list<StringData> fieldsToPropagateToShards = { "$queryOptions", "readConcern", QueryRequest::cmdOptionMaxTimeMS, }; for (auto&& field : fieldsToPropagateToShards) { commandBuilder[field] = Value(cmdObj[field]); } BSONObj shardedCommand = commandBuilder.freeze().toBson(); BSONObj shardQuery = shardPipeline->getInitialQuery(); // Run the command on the shards // TODO need to make sure cursors are killed if a retry is needed std::vector<Strategy::CommandResult> shardResults; Strategy::commandOp(txn, namespaces.executionNss.db().toString(), shardedCommand, options, namespaces.executionNss.ns(), shardQuery, request.getValue().getCollation(), &shardResults); if (mergeCtx->isExplain) { // This must be checked before we start modifying result. uassertAllShardsSupportExplain(shardResults); if (needSplit) { *result << "needsPrimaryShardMerger" << needPrimaryShardMerger << "splitPipeline" << DOC("shardsPart" << shardPipeline->writeExplainOps() << "mergerPart" << pipeline.getValue()->writeExplainOps()); } else { *result << "splitPipeline" << BSONNULL; } BSONObjBuilder shardExplains(result->subobjStart("shards")); for (size_t i = 0; i < shardResults.size(); i++) { shardExplains.append(shardResults[i].shardTargetId, BSON("host" << shardResults[i].target.toString() << "stages" << shardResults[i].result["stages"])); } return Status::OK(); } if (!needSplit) { invariant(shardResults.size() == 1); invariant(shardResults[0].target.getServers().size() == 1); auto executorPool = Grid::get(txn)->getExecutorPool(); const BSONObj reply = uassertStatusOK(storePossibleCursor(shardResults[0].target.getServers()[0], shardResults[0].result, namespaces.requestedNss, executorPool->getArbitraryExecutor(), Grid::get(txn)->getCursorManager())); result->appendElements(reply); return getStatusFromCommandResult(reply); } pipeline.getValue()->addInitialSource( DocumentSourceMergeCursors::create(parseCursors(shardResults), mergeCtx)); MutableDocument mergeCmd(request.getValue().serializeToCommandObj()); mergeCmd["pipeline"] = Value(pipeline.getValue()->serialize()); mergeCmd["cursor"] = Value(cmdObj["cursor"]); if (cmdObj.hasField("$queryOptions")) { mergeCmd["$queryOptions"] = Value(cmdObj["$queryOptions"]); } if (cmdObj.hasField(QueryRequest::cmdOptionMaxTimeMS)) { mergeCmd[QueryRequest::cmdOptionMaxTimeMS] = Value(cmdObj[QueryRequest::cmdOptionMaxTimeMS]); } mergeCmd.setField("writeConcern", Value(cmdObj["writeConcern"])); mergeCmd.setField("readConcern", Value(cmdObj["readConcern"])); // If the user didn't specify a collation already, make sure there's a collation attached to // the merge command, since the merging shard may not have the collection metadata. if (mergeCmd.peek()["collation"].missing()) { mergeCmd.setField("collation", mergeCtx->getCollator() ? Value(mergeCtx->getCollator()->getSpec().toBSON()) : Value(Document{CollationSpec::kSimpleSpec})); } std::string outputNsOrEmpty; if (DocumentSourceOut* out = dynamic_cast<DocumentSourceOut*>(pipeline.getValue()->getSources().back().get())) { outputNsOrEmpty = out->getOutputNs().ns(); } // Run merging command on random shard, unless a stage needs the primary shard. Need to use // ShardConnection so that the merging mongod is sent the config servers on connection init. auto& prng = txn->getClient()->getPrng(); const auto& mergingShardId = (needPrimaryShardMerger || internalQueryAlwaysMergeOnPrimaryShard.load()) ? conf->getPrimaryId() : shardResults[prng.nextInt32(shardResults.size())].shardTargetId; const auto mergingShard = uassertStatusOK(Grid::get(txn)->shardRegistry()->getShard(txn, mergingShardId)); ShardConnection conn(mergingShard->getConnString(), outputNsOrEmpty); BSONObj mergedResults = aggRunCommand(txn, conn.get(), namespaces, mergeCmd.freeze().toBson(), options); conn.done(); if (auto wcErrorElem = mergedResults["writeConcernError"]) { appendWriteConcernErrorToCmdResponse(mergingShardId, wcErrorElem, *result); } // Copy output from merging (primary) shard to the output object from our command. // Also, propagates errmsg and code if ok == false. result->appendElementsUnique(mergedResults); return getStatusFromCommandResult(result->asTempObj()); }
void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) { const auto optReceivedShardVersion = getOperationReceivedVersion(opCtx, _nss); if (!optReceivedShardVersion) return; const auto& receivedShardVersion = *optReceivedShardVersion; if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { return; } // An operation with read concern 'available' should never have shardVersion set. invariant(repl::ReadConcernArgs::get(opCtx).getLevel() != repl::ReadConcernLevel::kAvailableReadConcern); const auto metadata = getCurrentMetadata(); const auto wantedShardVersion = metadata->isSharded() ? metadata->getShardVersion() : ChunkVersion::UNSHARDED(); auto criticalSectionSignal = [&] { auto csrLock = CSRLock::lock(opCtx, this); return _critSec.getSignal(opCtx->lockState()->isWriteLocked() ? ShardingMigrationCriticalSection::kWrite : ShardingMigrationCriticalSection::kRead); }(); if (criticalSectionSignal) { // Set migration critical section on operation sharding state: operation will wait for the // migration to finish before returning failure and retrying. auto& oss = OperationShardingState::get(opCtx); oss.setMigrationCriticalSectionSignal(criticalSectionSignal); uasserted(StaleConfigInfo(_nss, receivedShardVersion, wantedShardVersion), str::stream() << "migration commit in progress for " << _nss.ns()); } if (receivedShardVersion.isWriteCompatibleWith(wantedShardVersion)) { return; } // // Figure out exactly why not compatible, send appropriate error message // The versions themselves are returned in the error, so not needed in messages here // StaleConfigInfo sci(_nss, receivedShardVersion, wantedShardVersion); uassert(std::move(sci), str::stream() << "epoch mismatch detected for " << _nss.ns() << ", " << "the collection may have been dropped and recreated", wantedShardVersion.epoch() == receivedShardVersion.epoch()); if (!wantedShardVersion.isSet() && receivedShardVersion.isSet()) { uasserted(std::move(sci), str::stream() << "this shard no longer contains chunks for " << _nss.ns() << ", " << "the collection may have been dropped"); } if (wantedShardVersion.isSet() && !receivedShardVersion.isSet()) { uasserted(std::move(sci), str::stream() << "this shard contains chunks for " << _nss.ns() << ", " << "but the client expects unsharded collection"); } if (wantedShardVersion.majorVersion() != receivedShardVersion.majorVersion()) { // Could be > or < - wanted is > if this is the source of a migration, wanted < if this is // the target of a migration uasserted(std::move(sci), str::stream() << "version mismatch detected for " << _nss.ns()); } // Those are all the reasons the versions can mismatch MONGO_UNREACHABLE; }