void run() { OperationContextImpl txn; Client::ReadContext ctx(&txn, "unittests.matchertests"); M m(BSON("$where" << "function(){ return this.a == 1; }"), WhereCallbackReal(&txn, StringData("unittests"))); ASSERT( m.matches( BSON( "a" << 1 ) ) ); ASSERT( !m.matches( BSON( "a" << 2 ) ) ); }
Status getExecutorIDHack(OperationContext* txn, Collection* collection, CanonicalQuery* rawCanonicalQuery, const QueryPlannerParams& plannerParams, PlanExecutor** out) { invariant(collection); invariant(rawCanonicalQuery); auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery); LOG(2) << "Using idhack: " << canonicalQuery->toStringShort(); WorkingSet* ws = new WorkingSet(); PlanStage* root = new IDHackStage(txn, collection, canonicalQuery.get(), ws); // Might have to filter out orphaned docs. if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { root = new ShardFilterStage(shardingState.getCollectionMetadata(collection->ns()), ws, root); } // There might be a projection. The idhack stage will always fetch the full document, // so we don't support covered projections. However, we might use the simple inclusion // fast path. if (NULL != canonicalQuery->getProj()) { ProjectionStageParams params(WhereCallbackReal(collection->ns().db())); params.projObj = canonicalQuery->getProj()->getProjObj(); // Stuff the right data into the params depending on what proj impl we use. if (canonicalQuery->getProj()->requiresDocument() || canonicalQuery->getProj()->wantIndexKey()) { params.fullExpression = canonicalQuery->root(); params.projImpl = ProjectionStageParams::NO_FAST_PATH; } else { params.projImpl = ProjectionStageParams::SIMPLE_DOC; } root = new ProjectionStage(params, ws, root); } *out = new PlanExecutor(ws, root, canonicalQuery.release(), collection); return Status::OK(); }
std::string newRunQuery(OperationContext* txn, Message& m, QueryMessage& q, CurOp& curop, Message &result, bool fromDBDirectClient) { // Validate the namespace. const char *ns = q.ns; uassert(16332, "can't have an empty ns", ns[0]); const NamespaceString nsString(ns); uassert(16256, str::stream() << "Invalid ns [" << ns << "]", nsString.isValid()); // Set curop information. curop.debug().ns = ns; curop.debug().ntoreturn = q.ntoreturn; curop.debug().query = q.query; curop.setQuery(q.query); // If the query is really a command, run it. if (nsString.isCommand()) { int nToReturn = q.ntoreturn; uassert(16979, str::stream() << "bad numberToReturn (" << nToReturn << ") for $cmd type ns - can only be 1 or -1", nToReturn == 1 || nToReturn == -1); curop.markCommand(); BufBuilder bb; bb.skip(sizeof(QueryResult::Value)); BSONObjBuilder cmdResBuf; if (!runCommands(txn, ns, q.query, curop, bb, cmdResBuf, false, q.queryOptions)) { uasserted(13530, "bad or malformed command request?"); } curop.debug().iscommand = true; // TODO: Does this get overwritten/do we really need to set this twice? curop.debug().query = q.query; QueryResult::View qr = bb.buf(); bb.decouple(); qr.setResultFlagsToOk(); qr.msgdata().setLen(bb.len()); curop.debug().responseLength = bb.len(); qr.msgdata().setOperation(opReply); qr.setCursorId(0); qr.setStartingFrom(0); qr.setNReturned(1); result.setData(qr.view2ptr(), true); return ""; } const NamespaceString nss(q.ns); // Parse the qm into a CanonicalQuery. CanonicalQuery* cq; Status canonStatus = CanonicalQuery::canonicalize( q, &cq, WhereCallbackReal(txn, StringData(nss.db()))); if (!canonStatus.isOK()) { uasserted(17287, str::stream() << "Can't canonicalize query: " << canonStatus.toString()); } QLOG() << "Running query:\n" << cq->toString(); LOG(2) << "Running query: " << cq->toStringShort(); // Parse, canonicalize, plan, transcribe, and get a plan executor. PlanExecutor* rawExec = NULL; // We use this a lot below. const LiteParsedQuery& pq = cq->getParsed(); AutoGetCollectionForRead ctx(txn, nss); const int dbProfilingLevel = (ctx.getDb() != NULL) ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; Collection* collection = ctx.getCollection(); // We'll now try to get the query executor that will execute this query for us. There // are a few cases in which we know upfront which executor we should get and, therefore, // we shortcut the selection process here. // // (a) If the query is over a collection that doesn't exist, we use an EOFStage. // // (b) if the query is a replication's initial sync one, we use a specifically designed // stage that skips extents faster (see details in exec/oplogstart.h). // // Otherwise we go through the selection of which executor is most suited to the // query + run-time context at hand. Status status = Status::OK(); if (NULL != collection && pq.getOptions().oplogReplay) { // Takes ownership of 'cq'. status = getOplogStartHack(txn, collection, cq, &rawExec); } else { size_t options = QueryPlannerParams::DEFAULT; if (shardingState.needCollectionMetadata(pq.ns())) { options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; } // Takes ownership of 'cq'. status = getExecutor(txn, collection, cq, PlanExecutor::YIELD_AUTO, &rawExec, options); } if (!status.isOK()) { // NOTE: Do not access cq as getExecutor has deleted it. uasserted(17007, "Unable to execute query: " + status.reason()); } verify(NULL != rawExec); auto_ptr<PlanExecutor> exec(rawExec); // If it's actually an explain, do the explain and return rather than falling through // to the normal query execution loop. if (pq.isExplain()) { BufBuilder bb; bb.skip(sizeof(QueryResult::Value)); BSONObjBuilder explainBob; Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob); // Add the resulting object to the return buffer. BSONObj explainObj = explainBob.obj(); bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize()); curop.debug().iscommand = true; // TODO: Does this get overwritten/do we really need to set this twice? curop.debug().query = q.query; // Set query result fields. QueryResult::View qr = bb.buf(); bb.decouple(); qr.setResultFlagsToOk(); qr.msgdata().setLen(bb.len()); curop.debug().responseLength = bb.len(); qr.msgdata().setOperation(opReply); qr.setCursorId(0); qr.setStartingFrom(0); qr.setNReturned(1); result.setData(qr.view2ptr(), true); return ""; } // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState.getVersion(cq->ns()); // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. bool slaveOK = pq.getOptions().slaveOk || pq.hasReadPref(); status = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor( txn, NamespaceString(cq->ns()), slaveOK); uassertStatusOK(status); // If this exists, the collection is sharded. // If it doesn't exist, we can assume we're not sharded. // If we're sharded, we might encounter data that is not consistent with our sharding state. // We must ignore this data. CollectionMetadataPtr collMetadata; if (!shardingState.needCollectionMetadata(pq.ns())) { collMetadata = CollectionMetadataPtr(); } else { collMetadata = shardingState.getCollectionMetadata(pq.ns()); } // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(32768); bb.skip(sizeof(QueryResult::Value)); // How many results have we obtained from the executor? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. OpTime slaveReadTill; // Do we save the PlanExecutor in a ClientCursor for getMore calls later? bool saveClientCursor = false; BSONObj obj; PlanExecutor::ExecState state; // uint64_t numMisplacedDocs = 0; // Get summary info about which plan the executor is using. curop.debug().planSummary = Explain::getPlanSummary(exec.get()); while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.getOptions().oplogReplay) { BSONElement e = obj["ts"]; if (Date == e.type() || Timestamp == e.type()) { slaveReadTill = e._opTime(); } } // TODO: only one type of 2d search doesn't support this. We need a way to pull it out // of CanonicalQuery. :( const bool supportsGetMore = true; if (!supportsGetMore && (enough(pq, numResults) || bb.len() >= MaxBytesToReturnToClientAtOnce)) { break; } else if (enoughForFirstBatch(pq, numResults, bb.len())) { QLOG() << "Enough for first batch, wantMore=" << pq.wantMore() << " numToReturn=" << pq.getNumToReturn() << " numResults=" << numResults << endl; // If only one result requested assume it's a findOne() and don't save the cursor. if (pq.wantMore() && 1 != pq.getNumToReturn()) { QLOG() << " executor EOF=" << exec->isEOF() << endl; saveClientCursor = !exec->isEOF(); } break; } } // If we cache the executor later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the executor later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the executor. exec->deregisterExec(); // Caller expects exceptions thrown in certain cases. if (PlanExecutor::EXEC_ERROR == state) { scoped_ptr<PlanStageStats> stats(exec->getStats()); error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats); uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj)); } // Why save a dead executor? if (PlanExecutor::DEAD == state) { saveClientCursor = false; } else if (pq.getOptions().tailable) { // If we're tailing a capped collection, we don't bother saving the cursor if the // collection is empty. Otherwise, the semantics of the tailable cursor is that the // client will keep trying to read from it. So we'll keep it around. if (collection && collection->numRecords(txn) != 0 && pq.getNumToReturn() != 1) { saveClientCursor = true; } } // TODO(greg): This will go away soon. if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongos can resend at this point throw SendStaleConfigException(pq.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(pq.ns())); } const logger::LogComponent queryLogComponent = logger::LogComponent::kQuery; const logger::LogSeverity logLevelOne = logger::LogSeverity::Debug(1); PlanSummaryStats summaryStats; Explain::getSummaryStats(exec.get(), &summaryStats); curop.debug().ntoskip = pq.getSkip(); curop.debug().nreturned = numResults; curop.debug().scanAndOrder = summaryStats.hasSortStage; curop.debug().nscanned = summaryStats.totalKeysExamined; curop.debug().nscannedObjects = summaryStats.totalDocsExamined; curop.debug().idhack = summaryStats.isIdhack; // Set debug information for consumption by the profiler. if (dbProfilingLevel > 0 || curop.elapsedMillis() > serverGlobalParams.slowMS || logger::globalLogDomain()->shouldLog(queryLogComponent, logLevelOne)) { // Get BSON stats. scoped_ptr<PlanStageStats> execStats(exec->getStats()); BSONObjBuilder statsBob; Explain::statsToBSON(*execStats, &statsBob); curop.debug().execStats.set(statsBob.obj()); // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj. if (curop.debug().execStats.tooBig() && !curop.debug().planSummary.empty()) { BSONObjBuilder bob; bob.append("summary", curop.debug().planSummary.toString()); curop.debug().execStats.set(bob.done()); } } long long ccId = 0; if (saveClientCursor) { // We won't use the executor until it's getMore'd. exec->saveState(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(collection, exec.get(), cq->getParsed().getOptions().toInt(), cq->getParsed().getFilter()); ccId = cc->cursorid(); if (fromDBDirectClient) { cc->setUnownedRecoveryUnit(txn->recoveryUnit()); } else if (state == PlanExecutor::IS_EOF && pq.getOptions().tailable) { // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their // next getMore. } else { // We stash away the RecoveryUnit in the ClientCursor. It's used for subsequent // getMore requests. The calling OpCtx gets a fresh RecoveryUnit. cc->setOwnedRecoveryUnit(txn->releaseRecoveryUnit()); StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine(); txn->setRecoveryUnit(storageEngine->newRecoveryUnit(txn)); } QLOG() << "caching executor with cursorid " << ccId << " after returning " << numResults << " results" << endl; // ClientCursor takes ownership of executor. Release to make sure it's not deleted. exec.release(); // TODO document if (pq.getOptions().oplogReplay && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.getOptions().exhaust) { curop.debug().exhaust = true; } // Set attributes for getMore. cc->setCollMetadata(collMetadata); cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); } else { QLOG() << "Not caching executor but returning " << numResults << " results.\n"; } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult::View qr = result.header().view2ptr(); qr.setCursorId(ccId); curop.debug().cursorid = (0 == ccId ? -1 : ccId); qr.setResultFlagsToOk(); qr.msgdata().setOperation(opReply); qr.setStartingFrom(0); qr.setNReturned(numResults); // curop.debug().exhaust is set above. return curop.debug().exhaust ? pq.ns() : ""; }
PlanStage* buildStages(OperationContext* txn, Collection* collection, const QuerySolution& qsol, const QuerySolutionNode* root, WorkingSet* ws) { if (STAGE_COLLSCAN == root->getType()) { const CollectionScanNode* csn = static_cast<const CollectionScanNode*>(root); CollectionScanParams params; params.collection = collection; params.tailable = csn->tailable; params.direction = (csn->direction == 1) ? CollectionScanParams::FORWARD : CollectionScanParams::BACKWARD; params.maxScan = csn->maxScan; return new CollectionScan(txn, params, ws, csn->filter.get()); } else if (STAGE_IXSCAN == root->getType()) { const IndexScanNode* ixn = static_cast<const IndexScanNode*>(root); if (NULL == collection) { warning() << "Can't ixscan null namespace"; return NULL; } IndexScanParams params; params.descriptor = collection->getIndexCatalog()->findIndexByKeyPattern( txn, ixn->indexKeyPattern ); if ( params.descriptor == NULL ) { warning() << "Can't find index " << ixn->indexKeyPattern.toString() << "in namespace " << collection->ns() << endl; return NULL; } params.bounds = ixn->bounds; params.direction = ixn->direction; params.maxScan = ixn->maxScan; params.addKeyMetadata = ixn->addKeyMetadata; return new IndexScan(txn, params, ws, ixn->filter.get()); } else if (STAGE_FETCH == root->getType()) { const FetchNode* fn = static_cast<const FetchNode*>(root); PlanStage* childStage = buildStages(txn, collection, qsol, fn->children[0], ws); if (NULL == childStage) { return NULL; } return new FetchStage(txn, ws, childStage, fn->filter.get(), collection); } else if (STAGE_SORT == root->getType()) { const SortNode* sn = static_cast<const SortNode*>(root); PlanStage* childStage = buildStages(txn, collection, qsol, sn->children[0], ws); if (NULL == childStage) { return NULL; } SortStageParams params; params.collection = collection; params.pattern = sn->pattern; params.query = sn->query; params.limit = sn->limit; return new SortStage(txn, params, ws, childStage); } else if (STAGE_PROJECTION == root->getType()) { const ProjectionNode* pn = static_cast<const ProjectionNode*>(root); PlanStage* childStage = buildStages(txn, collection, qsol, pn->children[0], ws); if (NULL == childStage) { return NULL; } ProjectionStageParams params(WhereCallbackReal(txn, collection->ns().db())); params.projObj = pn->projection; // Stuff the right data into the params depending on what proj impl we use. if (ProjectionNode::DEFAULT == pn->projType) { params.fullExpression = pn->fullExpression; params.projImpl = ProjectionStageParams::NO_FAST_PATH; } else if (ProjectionNode::COVERED_ONE_INDEX == pn->projType) { params.projImpl = ProjectionStageParams::COVERED_ONE_INDEX; params.coveredKeyObj = pn->coveredKeyObj; invariant(!pn->coveredKeyObj.isEmpty()); } else { invariant(ProjectionNode::SIMPLE_DOC == pn->projType); params.projImpl = ProjectionStageParams::SIMPLE_DOC; } return new ProjectionStage(params, ws, childStage); } else if (STAGE_LIMIT == root->getType()) { const LimitNode* ln = static_cast<const LimitNode*>(root); PlanStage* childStage = buildStages(txn, collection, qsol, ln->children[0], ws); if (NULL == childStage) { return NULL; } return new LimitStage(ln->limit, ws, childStage); } else if (STAGE_SKIP == root->getType()) { const SkipNode* sn = static_cast<const SkipNode*>(root); PlanStage* childStage = buildStages(txn, collection, qsol, sn->children[0], ws); if (NULL == childStage) { return NULL; } return new SkipStage(sn->skip, ws, childStage); } else if (STAGE_AND_HASH == root->getType()) { const AndHashNode* ahn = static_cast<const AndHashNode*>(root); auto_ptr<AndHashStage> ret(new AndHashStage(txn, ws, ahn->filter.get(), collection)); for (size_t i = 0; i < ahn->children.size(); ++i) { PlanStage* childStage = buildStages(txn, collection, qsol, ahn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_OR == root->getType()) { const OrNode * orn = static_cast<const OrNode*>(root); auto_ptr<OrStage> ret(new OrStage(ws, orn->dedup, orn->filter.get())); for (size_t i = 0; i < orn->children.size(); ++i) { PlanStage* childStage = buildStages(txn, collection, qsol, orn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_AND_SORTED == root->getType()) { const AndSortedNode* asn = static_cast<const AndSortedNode*>(root); auto_ptr<AndSortedStage> ret(new AndSortedStage(txn, ws, asn->filter.get(), collection)); for (size_t i = 0; i < asn->children.size(); ++i) { PlanStage* childStage = buildStages(txn, collection, qsol, asn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_SORT_MERGE == root->getType()) { const MergeSortNode* msn = static_cast<const MergeSortNode*>(root); MergeSortStageParams params; params.dedup = msn->dedup; params.pattern = msn->sort; auto_ptr<MergeSortStage> ret(new MergeSortStage(txn, params, ws, collection)); for (size_t i = 0; i < msn->children.size(); ++i) { PlanStage* childStage = buildStages(txn, collection, qsol, msn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_GEO_NEAR_2D == root->getType()) { const GeoNear2DNode* node = static_cast<const GeoNear2DNode*>(root); GeoNearParams params; params.nearQuery = node->nq; params.baseBounds = node->baseBounds; params.filter = node->filter.get(); params.addPointMeta = node->addPointMeta; params.addDistMeta = node->addDistMeta; IndexDescriptor* twoDIndex = collection->getIndexCatalog()->findIndexByKeyPattern(txn, node->indexKeyPattern); if (twoDIndex == NULL) { warning() << "Can't find 2D index " << node->indexKeyPattern.toString() << "in namespace " << collection->ns() << endl; return NULL; } GeoNear2DStage* nearStage = new GeoNear2DStage(params, txn, ws, collection, twoDIndex); return nearStage; } else if (STAGE_GEO_NEAR_2DSPHERE == root->getType()) { const GeoNear2DSphereNode* node = static_cast<const GeoNear2DSphereNode*>(root); GeoNearParams params; params.nearQuery = node->nq; params.baseBounds = node->baseBounds; params.filter = node->filter.get(); params.addPointMeta = node->addPointMeta; params.addDistMeta = node->addDistMeta; IndexDescriptor* s2Index = collection->getIndexCatalog()->findIndexByKeyPattern(txn, node->indexKeyPattern); if (s2Index == NULL) { warning() << "Can't find 2DSphere index " << node->indexKeyPattern.toString() << "in namespace " << collection->ns() << endl; return NULL; } return new GeoNear2DSphereStage(params, txn, ws, collection, s2Index); } else if (STAGE_TEXT == root->getType()) { const TextNode* node = static_cast<const TextNode*>(root); if (NULL == collection) { warning() << "Null collection for text"; return NULL; } vector<IndexDescriptor*> idxMatches; collection->getIndexCatalog()->findIndexByType(txn, "text", idxMatches); if (1 != idxMatches.size()) { warning() << "No text index, or more than one text index"; return NULL; } IndexDescriptor* index = idxMatches[0]; const FTSAccessMethod* fam = static_cast<FTSAccessMethod*>( collection->getIndexCatalog()->getIndex( index ) ); TextStageParams params(fam->getSpec()); //params.collection = collection; params.index = index; params.spec = fam->getSpec(); params.indexPrefix = node->indexPrefix; const std::string& language = ("" == node->language ? fam->getSpec().defaultLanguage().str() : node->language); Status parseStatus = params.query.parse(node->query, language, fam->getSpec().getTextIndexVersion()); if (!parseStatus.isOK()) { warning() << "Can't parse text search query"; return NULL; } return new TextStage(txn, params, ws, node->filter.get()); } else if (STAGE_SHARDING_FILTER == root->getType()) { const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root); PlanStage* childStage = buildStages(txn, collection, qsol, fn->children[0], ws); if (NULL == childStage) { return NULL; } return new ShardFilterStage(shardingState.getCollectionMetadata(collection->ns()), ws, childStage); } else if (STAGE_KEEP_MUTATIONS == root->getType()) { const KeepMutationsNode* km = static_cast<const KeepMutationsNode*>(root); PlanStage* childStage = buildStages(txn, collection, qsol, km->children[0], ws); if (NULL == childStage) { return NULL; } return new KeepMutationsStage(km->filter.get(), ws, childStage); } else if (STAGE_DISTINCT == root->getType()) { const DistinctNode* dn = static_cast<const DistinctNode*>(root); if (NULL == collection) { warning() << "Can't distinct-scan null namespace"; return NULL; } DistinctParams params; params.descriptor = collection->getIndexCatalog()->findIndexByKeyPattern(txn, dn->indexKeyPattern); params.direction = dn->direction; params.bounds = dn->bounds; params.fieldNo = dn->fieldNo; return new DistinctScan(txn, params, ws); } else if (STAGE_COUNT_SCAN == root->getType()) { const CountNode* cn = static_cast<const CountNode*>(root); if (NULL == collection) { warning() << "Can't fast-count null namespace (collection null)"; return NULL; } CountScanParams params; params.descriptor = collection->getIndexCatalog()->findIndexByKeyPattern(txn, cn->indexKeyPattern); params.startKey = cn->startKey; params.startKeyInclusive = cn->startKeyInclusive; params.endKey = cn->endKey; params.endKeyInclusive = cn->endKeyInclusive; return new CountScan(txn, params, ws); } else { mongoutils::str::stream ss; root->appendToString(&ss, 0); string nodeStr(ss); warning() << "Can't build exec tree for node " << nodeStr << endl; return NULL; } }
std::string runQuery(OperationContext* txn, QueryMessage& q, const NamespaceString& nss, CurOp& curop, Message &result) { // Validate the namespace. uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid()); invariant(!nss.isCommand()); // Set curop information. beginQueryOp(nss, q.query, q.ntoreturn, q.ntoskip, &curop); // Parse the qm into a CanonicalQuery. std::auto_ptr<CanonicalQuery> cq; { CanonicalQuery* cqRaw; Status canonStatus = CanonicalQuery::canonicalize(q, &cqRaw, WhereCallbackReal(txn, nss.db())); if (!canonStatus.isOK()) { uasserted(17287, str::stream() << "Can't canonicalize query: " << canonStatus.toString()); } cq.reset(cqRaw); } invariant(cq.get()); LOG(5) << "Running query:\n" << cq->toString(); LOG(2) << "Running query: " << cq->toStringShort(); // Parse, canonicalize, plan, transcribe, and get a plan executor. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; // We have a parsed query. Time to get the execution plan for it. std::unique_ptr<PlanExecutor> exec; { PlanExecutor* rawExec; Status execStatus = getExecutorFind(txn, collection, nss, cq.release(), PlanExecutor::YIELD_AUTO, &rawExec); uassertStatusOK(execStatus); exec.reset(rawExec); } const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // If it's actually an explain, do the explain and return rather than falling through // to the normal query execution loop. if (pq.isExplain()) { BufBuilder bb; bb.skip(sizeof(QueryResult::Value)); BSONObjBuilder explainBob; Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob); // Add the resulting object to the return buffer. BSONObj explainObj = explainBob.obj(); bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize()); // TODO: Does this get overwritten/do we really need to set this twice? curop.debug().query = q.query; // Set query result fields. QueryResult::View qr = bb.buf(); bb.decouple(); qr.setResultFlagsToOk(); qr.msgdata().setLen(bb.len()); curop.debug().responseLength = bb.len(); qr.msgdata().setOperation(opReply); qr.setCursorId(0); qr.setStartingFrom(0); qr.setNReturned(1); result.setData(qr.view2ptr(), true); return ""; } // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState.getVersion(nss.ns()); // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. bool slaveOK = pq.isSlaveOk() || pq.hasReadPref(); Status serveReadsStatus = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor( txn, nss, slaveOK); uassertStatusOK(serveReadsStatus); // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(32768); bb.skip(sizeof(QueryResult::Value)); // How many results have we obtained from the executor? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. Timestamp slaveReadTill; BSONObj obj; PlanExecutor::ExecState state; // uint64_t numMisplacedDocs = 0; // Get summary info about which plan the executor is using. curop.debug().planSummary = Explain::getPlanSummary(exec.get()); while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.isOplogReplay()) { BSONElement e = obj["ts"]; if (Date == e.type() || bsonTimestamp == e.type()) { slaveReadTill = e.timestamp(); } } if (enoughForFirstBatch(pq, numResults, bb.len())) { LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore() << " numToReturn=" << pq.getNumToReturn() << " numResults=" << numResults << endl; break; } } // If we cache the executor later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the executor later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the executor. exec->deregisterExec(); // Caller expects exceptions thrown in certain cases. if (PlanExecutor::FAILURE == state) { scoped_ptr<PlanStageStats> stats(exec->getStats()); error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats); uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj)); } // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the // chunk belonged on this node are gone. Separating chunk lifetime management from // ClientCursor should allow this check to go away. if (!shardingState.getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongos can resend at this point throw SendStaleConfigException(nss.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(nss.ns())); } // Fill out curop based on query results. If we have a cursorid, we will fill out curop with // this cursorid later. long long ccId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // We won't use the executor until it's getMore'd. exec->saveState(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), pq.getOptions(), pq.getFilter()); ccId = cc->cursorid(); if (txn->getClient()->isInDirectClient()) { cc->setUnownedRecoveryUnit(txn->recoveryUnit()); } else if (state == PlanExecutor::IS_EOF && pq.isTailable()) { // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their // next getMore. } else { // We stash away the RecoveryUnit in the ClientCursor. It's used for subsequent // getMore requests. The calling OpCtx gets a fresh RecoveryUnit. txn->recoveryUnit()->abandonSnapshot(); cc->setOwnedRecoveryUnit(txn->releaseRecoveryUnit()); StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine(); invariant(txn->setRecoveryUnit(storageEngine->newRecoveryUnit(), OperationContext::kNotInUnitOfWork) == OperationContext::kNotInUnitOfWork); } LOG(5) << "caching executor with cursorid " << ccId << " after returning " << numResults << " results" << endl; // TODO document if (pq.isOplogReplay() && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.isExhaust()) { curop.debug().exhaust = true; } cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); endQueryOp(cc->getExecutor(), dbProfilingLevel, numResults, ccId, &curop); } else { LOG(5) << "Not caching executor but returning " << numResults << " results.\n"; endQueryOp(exec.get(), dbProfilingLevel, numResults, ccId, &curop); } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult::View qr = result.header().view2ptr(); qr.setCursorId(ccId); qr.setResultFlagsToOk(); qr.msgdata().setOperation(opReply); qr.setStartingFrom(0); qr.setNReturned(numResults); // curop.debug().exhaust is set above. return curop.debug().exhaust ? nss.ns() : ""; }
/* * Runs the command object cmdobj on the db with name dbname and puts result in result. * @param dbname, name of db * @param cmdobj, object that contains entire command * @param options * @param errmsg, reference to error message * @param result, reference to builder for result * @param fromRepl * @return true if successful, false otherwise */ bool FTSCommand::_run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int cmdOptions, const string& ns, const string& searchString, string language, // "" for not-set int limit, BSONObj& filter, BSONObj& projection, string& errmsg, BSONObjBuilder& result ) { Timer comm; // Rewrite the cmd as a normal query. BSONObjBuilder queryBob; queryBob.appendElements(filter); BSONObjBuilder textBob; textBob.append("$search", searchString); if (!language.empty()) { textBob.append("$language", language); } queryBob.append("$text", textBob.obj()); // This is the query we exec. BSONObj queryObj = queryBob.obj(); // We sort by the score. BSONObj sortSpec = BSON("$s" << BSON("$meta" << LiteParsedQuery::metaTextScore)); // We also project the score into the document and strip it out later during the reformatting // of the results. BSONObjBuilder projBob; projBob.appendElements(projection); projBob.appendElements(sortSpec); BSONObj projObj = projBob.obj(); AutoGetCollectionForRead ctx(txn, ns); CanonicalQuery* cq; Status canonicalizeStatus = CanonicalQuery::canonicalize(ns, queryObj, sortSpec, projObj, 0, limit, BSONObj(), &cq, WhereCallbackReal(txn, dbname)); if (!canonicalizeStatus.isOK()) { errmsg = canonicalizeStatus.reason(); return false; } PlanExecutor* rawExec; Status getExecStatus = getExecutor(txn, ctx.getCollection(), cq, &rawExec); if (!getExecStatus.isOK()) { errmsg = getExecStatus.reason(); return false; } auto_ptr<PlanExecutor> exec(rawExec); BSONArrayBuilder resultBuilder(result.subarrayStart("results")); // Quoth: "leave a mb for other things" int resultSize = 1024 * 1024; int numReturned = 0; BSONObj obj; while (PlanExecutor::ADVANCED == exec->getNext(&obj, NULL)) { if ((resultSize + obj.objsize()) >= BSONObjMaxUserSize) { break; } // We return an array of results. Add another element. BSONObjBuilder oneResultBuilder(resultBuilder.subobjStart()); oneResultBuilder.append("score", obj["$s"].number()); // Strip out the score from the returned obj. BSONObjIterator resIt(obj); BSONObjBuilder resBob; while (resIt.more()) { BSONElement elt = resIt.next(); if (!mongoutils::str::equals("$s", elt.fieldName())) { resBob.append(elt); } } oneResultBuilder.append("obj", resBob.obj()); BSONObj addedArrayObj = oneResultBuilder.done(); resultSize += addedArrayObj.objsize(); numReturned++; } resultBuilder.done(); // returns some stats to the user BSONObjBuilder stats(result.subobjStart("stats")); // Fill in nscanned from the explain. PlanSummaryStats summary; Explain::getSummaryStats(exec.get(), &summary); stats.appendNumber("nscanned", summary.totalKeysExamined); stats.appendNumber("nscannedObjects", summary.totalDocsExamined); stats.appendNumber( "n" , numReturned ); stats.append( "timeMicros", (int)comm.micros() ); stats.done(); return true; }
PlanStage* parseQuery(OperationContext* txn, Collection* collection, BSONObj obj, WorkingSet* workingSet, OwnedPointerVector<MatchExpression>* exprs) { BSONElement firstElt = obj.firstElement(); if (!firstElt.isABSONObj()) { return NULL; } BSONObj paramObj = firstElt.Obj(); MatchExpression* matcher = NULL; BSONObj nodeArgs; // Every node has these two fields. const string filterTag = "filter"; const string argsTag = "args"; BSONObjIterator it(paramObj); while (it.more()) { BSONElement e = it.next(); if (!e.isABSONObj()) { return NULL; } BSONObj argObj = e.Obj(); if (filterTag == e.fieldName()) { StatusWithMatchExpression swme = MatchExpressionParser::parse( argObj, WhereCallbackReal(txn, collection->ns().db())); if (!swme.isOK()) { return NULL; } // exprs is what will wind up deleting this. matcher = swme.getValue(); verify(NULL != matcher); exprs->mutableVector().push_back(matcher); } else if (argsTag == e.fieldName()) { nodeArgs = argObj; } else { uasserted(16910, "Unknown fieldname " + string(e.fieldName()) + " in query node " + obj.toString()); return NULL; } } string nodeName = firstElt.fieldName(); if ("ixscan" == nodeName) { // This'll throw if it's not an obj but that's OK. BSONObj keyPatternObj = nodeArgs["keyPattern"].Obj(); IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern(keyPatternObj); uassert(16890, "Can't find index: " + keyPatternObj.toString(), desc); IndexScanParams params; params.descriptor = desc; params.bounds.isSimpleRange = true; params.bounds.startKey = nodeArgs["startKey"].Obj(); params.bounds.endKey = nodeArgs["endKey"].Obj(); params.bounds.endKeyInclusive = nodeArgs["endKeyInclusive"].Bool(); params.direction = nodeArgs["direction"].numberInt(); return new IndexScan(txn, params, workingSet, matcher); } else if ("andHash" == nodeName) { uassert(16921, "Nodes argument must be provided to AND", nodeArgs["nodes"].isABSONObj()); auto_ptr<AndHashStage> andStage(new AndHashStage(workingSet, matcher, collection)); int nodesAdded = 0; BSONObjIterator it(nodeArgs["nodes"].Obj()); while (it.more()) { BSONElement e = it.next(); uassert(16922, "node of AND isn't an obj?: " + e.toString(), e.isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert(16923, "Can't parse sub-node of AND: " + e.Obj().toString(), NULL != subNode); // takes ownership andStage->addChild(subNode); ++nodesAdded; } uassert(16927, "AND requires more than one child", nodesAdded >= 2); return andStage.release(); } else if ("andSorted" == nodeName) { uassert(16924, "Nodes argument must be provided to AND", nodeArgs["nodes"].isABSONObj()); auto_ptr<AndSortedStage> andStage( new AndSortedStage(workingSet, matcher, collection)); int nodesAdded = 0; BSONObjIterator it(nodeArgs["nodes"].Obj()); while (it.more()) { BSONElement e = it.next(); uassert(16925, "node of AND isn't an obj?: " + e.toString(), e.isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert(16926, "Can't parse sub-node of AND: " + e.Obj().toString(), NULL != subNode); // takes ownership andStage->addChild(subNode); ++nodesAdded; } uassert(16928, "AND requires more than one child", nodesAdded >= 2); return andStage.release(); } else if ("or" == nodeName) { uassert(16934, "Nodes argument must be provided to AND", nodeArgs["nodes"].isABSONObj()); uassert(16935, "Dedup argument must be provided to OR", !nodeArgs["dedup"].eoo()); BSONObjIterator it(nodeArgs["nodes"].Obj()); auto_ptr<OrStage> orStage(new OrStage(workingSet, nodeArgs["dedup"].Bool(), matcher)); while (it.more()) { BSONElement e = it.next(); if (!e.isABSONObj()) { return NULL; } PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert(16936, "Can't parse sub-node of OR: " + e.Obj().toString(), NULL != subNode); // takes ownership orStage->addChild(subNode); } return orStage.release(); } else if ("fetch" == nodeName) { uassert(16929, "Node argument must be provided to fetch", nodeArgs["node"].isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); return new FetchStage(workingSet, subNode, matcher, collection); } else if ("limit" == nodeName) { uassert(16937, "Limit stage doesn't have a filter (put it on the child)", NULL == matcher); uassert(16930, "Node argument must be provided to limit", nodeArgs["node"].isABSONObj()); uassert(16931, "Num argument must be provided to limit", nodeArgs["num"].isNumber()); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); return new LimitStage(nodeArgs["num"].numberInt(), workingSet, subNode); } else if ("skip" == nodeName) { uassert(16938, "Skip stage doesn't have a filter (put it on the child)", NULL == matcher); uassert(16932, "Node argument must be provided to skip", nodeArgs["node"].isABSONObj()); uassert(16933, "Num argument must be provided to skip", nodeArgs["num"].isNumber()); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); return new SkipStage(nodeArgs["num"].numberInt(), workingSet, subNode); } else if ("cscan" == nodeName) { CollectionScanParams params; params.collection = collection; // What direction? uassert(16963, "Direction argument must be specified and be a number", nodeArgs["direction"].isNumber()); if (1 == nodeArgs["direction"].numberInt()) { params.direction = CollectionScanParams::FORWARD; } else { params.direction = CollectionScanParams::BACKWARD; } return new CollectionScan(txn, params, workingSet, matcher); } // sort is disabled for now. #if 0 else if ("sort" == nodeName) { uassert(16969, "Node argument must be provided to sort", nodeArgs["node"].isABSONObj()); uassert(16970, "Pattern argument must be provided to sort", nodeArgs["pattern"].isABSONObj()); PlanStage* subNode = parseQuery(txn, db, nodeArgs["node"].Obj(), workingSet, exprs); SortStageParams params; params.pattern = nodeArgs["pattern"].Obj(); return new SortStage(params, workingSet, subNode); } #endif else if ("mergeSort" == nodeName) { uassert(16971, "Nodes argument must be provided to sort", nodeArgs["nodes"].isABSONObj()); uassert(16972, "Pattern argument must be provided to sort", nodeArgs["pattern"].isABSONObj()); MergeSortStageParams params; params.pattern = nodeArgs["pattern"].Obj(); // Dedup is true by default. auto_ptr<MergeSortStage> mergeStage( new MergeSortStage(params, workingSet, collection)); BSONObjIterator it(nodeArgs["nodes"].Obj()); while (it.more()) { BSONElement e = it.next(); uassert(16973, "node of mergeSort isn't an obj?: " + e.toString(), e.isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert(16974, "Can't parse sub-node of mergeSort: " + e.Obj().toString(), NULL != subNode); // takes ownership mergeStage->addChild(subNode); } return mergeStage.release(); } else if ("text" == nodeName) { string search = nodeArgs["search"].String(); vector<IndexDescriptor*> idxMatches; collection->getIndexCatalog()->findIndexByType("text", idxMatches); uassert(17194, "Expected exactly one text index", idxMatches.size() == 1); IndexDescriptor* index = idxMatches[0]; FTSAccessMethod* fam = dynamic_cast<FTSAccessMethod*>( collection->getIndexCatalog()->getIndex( index ) ); TextStageParams params(fam->getSpec()); params.index = index; // TODO: Deal with non-empty filters. This is a hack to put in covering information // that can only be checked for equality. We ignore this now. Status s = fam->getSpec().getIndexPrefix(BSONObj(), ¶ms.indexPrefix); if (!s.isOK()) { // errmsg = s.toString(); return NULL; } params.spec = fam->getSpec(); if (!params.query.parse(search, fam->getSpec().defaultLanguage().str().c_str()).isOK()) { return NULL; } return new TextStage(txn, params, workingSet, matcher); } else if ("delete" == nodeName) { uassert(18636, "Delete stage doesn't have a filter (put it on the child)", NULL == matcher); uassert(18637, "node argument must be provided to delete", nodeArgs["node"].isABSONObj()); uassert(18638, "isMulti argument must be provided to delete", nodeArgs["isMulti"].type() == Bool); uassert(18639, "shouldCallLogOp argument must be provided to delete", nodeArgs["shouldCallLogOp"].type() == Bool); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); DeleteStageParams params; params.isMulti = nodeArgs["isMulti"].Bool(); params.shouldCallLogOp = nodeArgs["shouldCallLogOp"].Bool(); return new DeleteStage(txn, params, workingSet, collection, subNode); } else { return NULL; } }