shared_ptr<PlanExecutor> PipelineD::addCursorSource(const intrusive_ptr<Pipeline>& pipeline, const intrusive_ptr<ExpressionContext>& expCtx, shared_ptr<PlanExecutor> exec, DepsTracker deps, const BSONObj& queryObj, const BSONObj& sortObj, const BSONObj& projectionObj) { // Get the full "namespace" name. const string& fullName = expCtx->ns.ns(); // Put the PlanExecutor into a DocumentSourceCursor and add it to the front of the pipeline. intrusive_ptr<DocumentSourceCursor> pSource = DocumentSourceCursor::create(fullName, exec, expCtx); // Note the query, sort, and projection for explain. pSource->setQuery(queryObj); pSource->setSort(sortObj); if (!projectionObj.isEmpty()) { pSource->setProjection(projectionObj, boost::none); } else { // There may be fewer dependencies now if the sort was covered. if (!sortObj.isEmpty()) { deps = pipeline->getDependencies(queryObj); } pSource->setProjection(deps.toProjection(), deps.toParsedDeps()); } while (!pipeline->sources.empty() && pSource->coalesce(pipeline->sources.front())) { pipeline->sources.pop_front(); } pipeline->addInitialSource(pSource); // DocumentSourceCursor expects a yielding PlanExecutor that has had its state saved. We // deregister the PlanExecutor so that it can be registered with ClientCursor. exec->deregisterExec(); exec->saveState(); return exec; }
intrusive_ptr<DocumentSource> DocumentSourceProject::createFromBson( BSONElement elem, const intrusive_ptr<ExpressionContext> &pExpCtx) { /* validate */ uassert(15969, str::stream() << projectName << " specification must be an object", elem.type() == Object); Expression::ObjectCtx objectCtx( Expression::ObjectCtx::DOCUMENT_OK | Expression::ObjectCtx::TOP_LEVEL | Expression::ObjectCtx::INCLUSION_OK ); VariablesIdGenerator idGenerator; VariablesParseState vps(&idGenerator); intrusive_ptr<Expression> parsed = Expression::parseObject(elem.Obj(), &objectCtx, vps); ExpressionObject* exprObj = dynamic_cast<ExpressionObject*>(parsed.get()); massert(16402, "parseObject() returned wrong type of Expression", exprObj); uassert(16403, "$projection requires at least one output field", exprObj->getFieldCount()); intrusive_ptr<DocumentSourceProject> pProject(new DocumentSourceProject(pExpCtx, exprObj)); pProject->_variables.reset(new Variables(idGenerator.getIdCount())); BSONObj projectObj = elem.Obj(); pProject->_raw = projectObj.getOwned(); #if defined(_DEBUG) if (exprObj->isSimple()) { DepsTracker deps; vector<string> path; exprObj->addDependencies(&deps, &path); pProject->_simpleProjection.init(deps.toProjection()); } #endif return pProject; }
boost::shared_ptr<Runner> PipelineD::prepareCursorSource( Collection* collection, const intrusive_ptr<Pipeline>& pPipeline, const intrusive_ptr<ExpressionContext>& pExpCtx) { // get the full "namespace" name const string& fullName = pExpCtx->ns.ns(); pExpCtx->opCtx->lockState()->assertAtLeastReadLocked(fullName); // We will be modifying the source vector as we go Pipeline::SourceContainer& sources = pPipeline->sources; // Inject a MongodImplementation to sources that need them. for (size_t i = 0; i < sources.size(); i++) { DocumentSourceNeedsMongod* needsMongod = dynamic_cast<DocumentSourceNeedsMongod*>(sources[i].get()); if (needsMongod) { needsMongod->injectMongodInterface( boost::make_shared<MongodImplementation>(pExpCtx)); } } if (!sources.empty() && sources.front()->isValidInitialSource()) { if (dynamic_cast<DocumentSourceMergeCursors*>(sources.front().get())) { // Enable the hooks for setting up authentication on the subsequent internal // connections we are going to create. This would normally have been done // when SetShardVersion was called, but since SetShardVersion is never called // on secondaries, this is needed. ShardedConnectionInfo::addHook(); } return boost::shared_ptr<Runner>(); // don't need a cursor } // Look for an initial match. This works whether we got an initial query or not. // If not, it results in a "{}" query, which will be what we want in that case. const BSONObj queryObj = pPipeline->getInitialQuery(); if (!queryObj.isEmpty()) { // This will get built in to the Cursor we'll create, so // remove the match from the pipeline sources.pop_front(); } // Find the set of fields in the source documents depended on by this pipeline. const DepsTracker deps = pPipeline->getDependencies(queryObj); // Passing query an empty projection since it is faster to use ParsedDeps::extractFields(). // This will need to change to support covering indexes (SERVER-12015). There is an // exception for textScore since that can only be retrieved by a query projection. const BSONObj projectionForQuery = deps.needTextScore ? deps.toProjection() : BSONObj(); /* Look for an initial sort; we'll try to add this to the Cursor we create. If we're successful in doing that (further down), we'll remove the $sort from the pipeline, because the documents will already come sorted in the specified order as a result of the index scan. */ intrusive_ptr<DocumentSourceSort> sortStage; BSONObj sortObj; if (!sources.empty()) { sortStage = dynamic_cast<DocumentSourceSort*>(sources.front().get()); if (sortStage) { // build the sort key sortObj = sortStage->serializeSortKey(/*explain*/false).toBson(); } } // Create the Runner. // // If we try to create a Runner that includes both the match and the // sort, and the two are incompatible wrt the available indexes, then // we don't get a Runner back. // // So we try to use both first. If that fails, try again, without the // sort. // // If we don't have a sort, jump straight to just creating a Runner // without the sort. // // If we are able to incorporate the sort into the Runner, remove it // from the head of the pipeline. // // LATER - we should be able to find this out before we create the // cursor. Either way, we can then apply other optimizations there // are tickets for, such as SERVER-4507. const size_t runnerOptions = QueryPlannerParams::DEFAULT | QueryPlannerParams::INCLUDE_SHARD_FILTER | QueryPlannerParams::NO_BLOCKING_SORT ; boost::shared_ptr<Runner> runner; bool sortInRunner = false; const WhereCallbackReal whereCallback(pExpCtx->ns.db()); if (sortStage) { CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(pExpCtx->ns, queryObj, sortObj, projectionForQuery, &cq, whereCallback); Runner* rawRunner; if (status.isOK() && getRunner(collection, cq, &rawRunner, runnerOptions).isOK()) { // success: The Runner will handle sorting for us using an index. runner.reset(rawRunner); sortInRunner = true; sources.pop_front(); if (sortStage->getLimitSrc()) { // need to reinsert coalesced $limit after removing $sort sources.push_front(sortStage->getLimitSrc()); } } } if (!runner.get()) { const BSONObj noSort; CanonicalQuery* cq; uassertStatusOK( CanonicalQuery::canonicalize(pExpCtx->ns, queryObj, noSort, projectionForQuery, &cq, whereCallback)); Runner* rawRunner; uassertStatusOK(getRunner(collection, cq, &rawRunner, runnerOptions)); runner.reset(rawRunner); } // DocumentSourceCursor expects a yielding Runner that has had its state saved. runner->saveState(); // Put the Runner into a DocumentSourceCursor and add it to the front of the pipeline. intrusive_ptr<DocumentSourceCursor> pSource = DocumentSourceCursor::create(fullName, runner, pExpCtx); // Note the query, sort, and projection for explain. pSource->setQuery(queryObj); if (sortInRunner) pSource->setSort(sortObj); pSource->setProjection(deps.toProjection(), deps.toParsedDeps()); while (!sources.empty() && pSource->coalesce(sources.front())) { sources.pop_front(); } pPipeline->addInitialSource(pSource); return runner; }
shared_ptr<PlanExecutor> PipelineD::prepareCursorSource( OperationContext* txn, Collection* collection, const NamespaceString& nss, const intrusive_ptr<Pipeline>& pPipeline, const intrusive_ptr<ExpressionContext>& pExpCtx) { // We will be modifying the source vector as we go. Pipeline::SourceContainer& sources = pPipeline->sources; // Inject a MongodImplementation to sources that need them. for (auto&& source : sources) { DocumentSourceNeedsMongod* needsMongod = dynamic_cast<DocumentSourceNeedsMongod*>(source.get()); if (needsMongod) { needsMongod->injectMongodInterface(std::make_shared<MongodImplementation>(pExpCtx)); } } if (!sources.empty()) { if (sources.front()->isValidInitialSource()) { if (dynamic_cast<DocumentSourceMergeCursors*>(sources.front().get())) { // Enable the hooks for setting up authentication on the subsequent internal // connections we are going to create. This would normally have been done // when SetShardVersion was called, but since SetShardVersion is never called // on secondaries, this is needed. ShardedConnectionInfo::addHook(); } return std::shared_ptr<PlanExecutor>(); // don't need a cursor } auto sampleStage = dynamic_cast<DocumentSourceSample*>(sources.front().get()); // Optimize an initial $sample stage if possible. if (collection && sampleStage) { const long long sampleSize = sampleStage->getSampleSize(); const long long numRecords = collection->getRecordStore()->numRecords(txn); auto exec = createRandomCursorExecutor(collection, txn, sampleSize, numRecords); if (exec) { // Replace $sample stage with $sampleFromRandomCursor stage. sources.pop_front(); std::string idString = collection->ns().isOplog() ? "ts" : "_id"; sources.emplace_front(DocumentSourceSampleFromRandomCursor::create( pExpCtx, sampleSize, idString, numRecords)); const BSONObj initialQuery; return addCursorSource( pPipeline, pExpCtx, exec, pPipeline->getDependencies(initialQuery)); } } } // Look for an initial match. This works whether we got an initial query or not. If not, it // results in a "{}" query, which will be what we want in that case. const BSONObj queryObj = pPipeline->getInitialQuery(); if (!queryObj.isEmpty()) { if (dynamic_cast<DocumentSourceMatch*>(sources.front().get())) { // If a $match query is pulled into the cursor, the $match is redundant, and can be // removed from the pipeline. sources.pop_front(); } else { // A $geoNear stage, the only other stage that can produce an initial query, is also // a valid initial stage and will be handled above. MONGO_UNREACHABLE; } } // Find the set of fields in the source documents depended on by this pipeline. DepsTracker deps = pPipeline->getDependencies(queryObj); BSONObj projForQuery = deps.toProjection(); /* Look for an initial sort; we'll try to add this to the Cursor we create. If we're successful in doing that (further down), we'll remove the $sort from the pipeline, because the documents will already come sorted in the specified order as a result of the index scan. */ intrusive_ptr<DocumentSourceSort> sortStage; BSONObj sortObj; if (!sources.empty()) { sortStage = dynamic_cast<DocumentSourceSort*>(sources.front().get()); if (sortStage) { // build the sort key sortObj = sortStage->serializeSortKey(/*explain*/ false).toBson(); } } // Create the PlanExecutor. auto exec = prepareExecutor(txn, collection, nss, pPipeline, pExpCtx, sortStage, deps, queryObj, &sortObj, &projForQuery); return addCursorSource(pPipeline, pExpCtx, exec, deps, queryObj, sortObj, projForQuery); }