Beispiel #1
        void run() {
            OperationContextImpl txn;
            Client::ReadContext ctx(&txn, "unittests.matchertests");

            M m(BSON("$where" << "function(){ return this.a == 1; }"),
                WhereCallbackReal(&txn, StringData("unittests")));
            ASSERT( m.matches( BSON( "a" << 1 ) ) );
            ASSERT( !m.matches( BSON( "a" << 2 ) ) );
Beispiel #2
    Status getExecutorIDHack(OperationContext* txn,
                             Collection* collection,
                             CanonicalQuery* rawCanonicalQuery,
                             const QueryPlannerParams& plannerParams,
                             PlanExecutor** out) {
        auto_ptr<CanonicalQuery> canonicalQuery(rawCanonicalQuery);

        LOG(2) << "Using idhack: " << canonicalQuery->toStringShort();
        WorkingSet* ws = new WorkingSet();
        PlanStage* root = new IDHackStage(txn, collection, canonicalQuery.get(), ws);

        // Might have to filter out orphaned docs.
        if (plannerParams.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) {
            root = new ShardFilterStage(shardingState.getCollectionMetadata(collection->ns()),
                                        ws, root);

        // There might be a projection. The idhack stage will always fetch the full document,
        // so we don't support covered projections. However, we might use the simple inclusion
        // fast path.
        if (NULL != canonicalQuery->getProj()) {
            ProjectionStageParams params(WhereCallbackReal(collection->ns().db()));
            params.projObj = canonicalQuery->getProj()->getProjObj();

            // Stuff the right data into the params depending on what proj impl we use.
            if (canonicalQuery->getProj()->requiresDocument()
                || canonicalQuery->getProj()->wantIndexKey()) {
                params.fullExpression = canonicalQuery->root();
                params.projImpl = ProjectionStageParams::NO_FAST_PATH;
            else {
                params.projImpl = ProjectionStageParams::SIMPLE_DOC;

            root = new ProjectionStage(params, ws, root);

        *out = new PlanExecutor(ws, root, canonicalQuery.release(), collection);
        return Status::OK();
Beispiel #3
    std::string newRunQuery(OperationContext* txn,
                            Message& m,
                            QueryMessage& q,
                            CurOp& curop,
                            Message &result,
                            bool fromDBDirectClient) {
        // Validate the namespace.
        const char *ns = q.ns;
        uassert(16332, "can't have an empty ns", ns[0]);

        const NamespaceString nsString(ns);
        uassert(16256, str::stream() << "Invalid ns [" << ns << "]", nsString.isValid());

        // Set curop information.
        curop.debug().ns = ns;
        curop.debug().ntoreturn = q.ntoreturn;
        curop.debug().query = q.query;

        // If the query is really a command, run it.
        if (nsString.isCommand()) {
            int nToReturn = q.ntoreturn;
            uassert(16979, str::stream() << "bad numberToReturn (" << nToReturn
                                         << ") for $cmd type ns - can only be 1 or -1",
                    nToReturn == 1 || nToReturn == -1);


            BufBuilder bb;

            BSONObjBuilder cmdResBuf;
            if (!runCommands(txn, ns, q.query, curop, bb, cmdResBuf, false, q.queryOptions)) {
                uasserted(13530, "bad or malformed command request?");

            curop.debug().iscommand = true;
            // TODO: Does this get overwritten/do we really need to set this twice?
            curop.debug().query = q.query;

            QueryResult::View qr = bb.buf();
            curop.debug().responseLength = bb.len();
            result.setData(qr.view2ptr(), true);
            return "";

        const NamespaceString nss(q.ns);

        // Parse the qm into a CanonicalQuery.
        CanonicalQuery* cq;
        Status canonStatus = CanonicalQuery::canonicalize(
                                    q, &cq, WhereCallbackReal(txn, StringData(nss.db())));
        if (!canonStatus.isOK()) {
            uasserted(17287, str::stream() << "Can't canonicalize query: " << canonStatus.toString());

        QLOG() << "Running query:\n" << cq->toString();
        LOG(2) << "Running query: " << cq->toStringShort();

        // Parse, canonicalize, plan, transcribe, and get a plan executor.
        PlanExecutor* rawExec = NULL;

        // We use this a lot below.
        const LiteParsedQuery& pq = cq->getParsed();

        AutoGetCollectionForRead ctx(txn, nss);

        const int dbProfilingLevel = (ctx.getDb() != NULL) ? ctx.getDb()->getProfilingLevel() :

        Collection* collection = ctx.getCollection();

        // We'll now try to get the query executor that will execute this query for us. There
        // are a few cases in which we know upfront which executor we should get and, therefore,
        // we shortcut the selection process here.
        // (a) If the query is over a collection that doesn't exist, we use an EOFStage.
        // (b) if the query is a replication's initial sync one, we use a specifically designed
        // stage that skips extents faster (see details in exec/oplogstart.h).
        // Otherwise we go through the selection of which executor is most suited to the
        // query + run-time context at hand.
        Status status = Status::OK();
        if (NULL != collection && pq.getOptions().oplogReplay) {
            // Takes ownership of 'cq'.
            status = getOplogStartHack(txn, collection, cq, &rawExec);
        else {
            size_t options = QueryPlannerParams::DEFAULT;
            if (shardingState.needCollectionMetadata(pq.ns())) {
                options |= QueryPlannerParams::INCLUDE_SHARD_FILTER;
            // Takes ownership of 'cq'.
            status = getExecutor(txn, collection, cq, PlanExecutor::YIELD_AUTO, &rawExec, options);

        if (!status.isOK()) {
            // NOTE: Do not access cq as getExecutor has deleted it.
            uasserted(17007, "Unable to execute query: " + status.reason());

        verify(NULL != rawExec);
        auto_ptr<PlanExecutor> exec(rawExec);

        // If it's actually an explain, do the explain and return rather than falling through
        // to the normal query execution loop.
        if (pq.isExplain()) {
            BufBuilder bb;

            BSONObjBuilder explainBob;
            Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob);

            // Add the resulting object to the return buffer.
            BSONObj explainObj = explainBob.obj();
            bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

            curop.debug().iscommand = true;
            // TODO: Does this get overwritten/do we really need to set this twice?
            curop.debug().query = q.query;

            // Set query result fields.
            QueryResult::View qr = bb.buf();
            curop.debug().responseLength = bb.len();
            result.setData(qr.view2ptr(), true);
            return "";

        // We freak out later if this changes before we're done with the query.
        const ChunkVersion shardingVersionAtStart = shardingState.getVersion(cq->ns());

        // Handle query option $maxTimeMS (not used with commands).
        curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000);
        txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

        // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set.
        bool slaveOK = pq.getOptions().slaveOk || pq.hasReadPref();
        status = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(

        // If this exists, the collection is sharded.
        // If it doesn't exist, we can assume we're not sharded.
        // If we're sharded, we might encounter data that is not consistent with our sharding state.
        // We must ignore this data.
        CollectionMetadataPtr collMetadata;
        if (!shardingState.needCollectionMetadata(pq.ns())) {
            collMetadata = CollectionMetadataPtr();
        else {
            collMetadata = shardingState.getCollectionMetadata(pq.ns());

        // Run the query.
        // bb is used to hold query results
        // this buffer should contain either requested documents per query or
        // explain information, but not both
        BufBuilder bb(32768);

        // How many results have we obtained from the executor?
        int numResults = 0;

        // If we're replaying the oplog, we save the last time that we read.
        OpTime slaveReadTill;

        // Do we save the PlanExecutor in a ClientCursor for getMore calls later?
        bool saveClientCursor = false;

        BSONObj obj;
        PlanExecutor::ExecState state;
        // uint64_t numMisplacedDocs = 0;

        // Get summary info about which plan the executor is using.
        curop.debug().planSummary = Explain::getPlanSummary(exec.get());

        while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
            // Add result to output buffer.
            bb.appendBuf((void*)obj.objdata(), obj.objsize());

            // Count the result.

            // Possibly note slave's position in the oplog.
            if (pq.getOptions().oplogReplay) {
                BSONElement e = obj["ts"];
                if (Date == e.type() || Timestamp == e.type()) {
                    slaveReadTill = e._opTime();

            // TODO: only one type of 2d search doesn't support this.  We need a way to pull it out
            // of CanonicalQuery. :(
            const bool supportsGetMore = true;
            if (!supportsGetMore && (enough(pq, numResults)
                                     || bb.len() >= MaxBytesToReturnToClientAtOnce)) {
            else if (enoughForFirstBatch(pq, numResults, bb.len())) {
                QLOG() << "Enough for first batch, wantMore=" << pq.wantMore()
                       << " numToReturn=" << pq.getNumToReturn()
                       << " numResults=" << numResults
                       << endl;
                // If only one result requested assume it's a findOne() and don't save the cursor.
                if (pq.wantMore() && 1 != pq.getNumToReturn()) {
                    QLOG() << " executor EOF=" << exec->isEOF() << endl;
                    saveClientCursor = !exec->isEOF();

        // If we cache the executor later, we want to deregister it as it receives notifications
        // anyway by virtue of being cached.
        // If we don't cache the executor later, we are deleting it, so it must be deregistered.
        // So, no matter what, deregister the executor.

        // Caller expects exceptions thrown in certain cases.
        if (PlanExecutor::EXEC_ERROR == state) {
            scoped_ptr<PlanStageStats> stats(exec->getStats());
            error() << "Plan executor error, stats: "
                    << Explain::statsToBSON(*stats);
            uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));

        // Why save a dead executor?
        if (PlanExecutor::DEAD == state) {
            saveClientCursor = false;
        else if (pq.getOptions().tailable) {
            // If we're tailing a capped collection, we don't bother saving the cursor if the
            // collection is empty. Otherwise, the semantics of the tailable cursor is that the
            // client will keep trying to read from it. So we'll keep it around.
            if (collection && collection->numRecords(txn) != 0 && pq.getNumToReturn() != 1) {
                saveClientCursor = true;

        // TODO(greg): This will go away soon.
        if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
            // if the version changed during the query we might be missing some data and its safe to
            // send this as mongos can resend at this point
            throw SendStaleConfigException(pq.ns(), "version changed during initial query",

        const logger::LogComponent queryLogComponent = logger::LogComponent::kQuery;
        const logger::LogSeverity logLevelOne = logger::LogSeverity::Debug(1);

        PlanSummaryStats summaryStats;
        Explain::getSummaryStats(exec.get(), &summaryStats);

        curop.debug().ntoskip = pq.getSkip();
        curop.debug().nreturned = numResults;
        curop.debug().scanAndOrder = summaryStats.hasSortStage;
        curop.debug().nscanned = summaryStats.totalKeysExamined;
        curop.debug().nscannedObjects = summaryStats.totalDocsExamined;
        curop.debug().idhack = summaryStats.isIdhack;

        // Set debug information for consumption by the profiler.
        if (dbProfilingLevel > 0 ||
            curop.elapsedMillis() > serverGlobalParams.slowMS ||
            logger::globalLogDomain()->shouldLog(queryLogComponent, logLevelOne)) {
            // Get BSON stats.
            scoped_ptr<PlanStageStats> execStats(exec->getStats());
            BSONObjBuilder statsBob;
            Explain::statsToBSON(*execStats, &statsBob);

            // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj.
            if (curop.debug().execStats.tooBig() && !curop.debug().planSummary.empty()) {
                BSONObjBuilder bob;
                bob.append("summary", curop.debug().planSummary.toString());

        long long ccId = 0;
        if (saveClientCursor) {
            // We won't use the executor until it's getMore'd.

            // Allocate a new ClientCursor.  We don't have to worry about leaking it as it's
            // inserted into a global map by its ctor.
            ClientCursor* cc = new ClientCursor(collection, exec.get(),
            ccId = cc->cursorid();

            if (fromDBDirectClient) {
            else if (state == PlanExecutor::IS_EOF && pq.getOptions().tailable) {
                // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their
                // next getMore.
            else {
                // We stash away the RecoveryUnit in the ClientCursor.  It's used for subsequent
                // getMore requests.  The calling OpCtx gets a fresh RecoveryUnit.
                StorageEngine* storageEngine = getGlobalEnvironment()->getGlobalStorageEngine();

            QLOG() << "caching executor with cursorid " << ccId
                   << " after returning " << numResults << " results" << endl;

            // ClientCursor takes ownership of executor.  Release to make sure it's not deleted.

            // TODO document
            if (pq.getOptions().oplogReplay && !slaveReadTill.isNull()) {

            // TODO document
            if (pq.getOptions().exhaust) {
                curop.debug().exhaust = true;

            // Set attributes for getMore.

            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).
        else {
            QLOG() << "Not caching executor but returning " << numResults << " results.\n";

        // Add the results from the query into the output buffer.
        result.appendData(bb.buf(), bb.len());

        // Fill out the output buffer's header.
        QueryResult::View qr = result.header().view2ptr();
        curop.debug().cursorid = (0 == ccId ? -1 : ccId);

        // curop.debug().exhaust is set above.
        return curop.debug().exhaust ? pq.ns() : "";
Beispiel #4
    PlanStage* buildStages(OperationContext* txn,
                           Collection* collection,
                           const QuerySolution& qsol,
                           const QuerySolutionNode* root,
                           WorkingSet* ws) {
        if (STAGE_COLLSCAN == root->getType()) {
            const CollectionScanNode* csn = static_cast<const CollectionScanNode*>(root);
            CollectionScanParams params;
            params.collection = collection;
            params.tailable = csn->tailable;
            params.direction = (csn->direction == 1) ? CollectionScanParams::FORWARD
                                                     : CollectionScanParams::BACKWARD;
            params.maxScan = csn->maxScan;
            return new CollectionScan(txn, params, ws, csn->filter.get());
        else if (STAGE_IXSCAN == root->getType()) {
            const IndexScanNode* ixn = static_cast<const IndexScanNode*>(root);

            if (NULL == collection) {
                warning() << "Can't ixscan null namespace";
                return NULL;

            IndexScanParams params;

            params.descriptor =
                collection->getIndexCatalog()->findIndexByKeyPattern( txn, ixn->indexKeyPattern );
            if ( params.descriptor == NULL ) {
                warning() << "Can't find index " << ixn->indexKeyPattern.toString()
                          << "in namespace " << collection->ns() << endl;
                return NULL;

            params.bounds = ixn->bounds;
            params.direction = ixn->direction;
            params.maxScan = ixn->maxScan;
            params.addKeyMetadata = ixn->addKeyMetadata;
            return new IndexScan(txn, params, ws, ixn->filter.get());
        else if (STAGE_FETCH == root->getType()) {
            const FetchNode* fn = static_cast<const FetchNode*>(root);
            PlanStage* childStage = buildStages(txn, collection, qsol, fn->children[0], ws);
            if (NULL == childStage) { return NULL; }
            return new FetchStage(txn, ws, childStage, fn->filter.get(), collection);
        else if (STAGE_SORT == root->getType()) {
            const SortNode* sn = static_cast<const SortNode*>(root);
            PlanStage* childStage = buildStages(txn, collection, qsol, sn->children[0], ws);
            if (NULL == childStage) { return NULL; }
            SortStageParams params;
            params.collection = collection;
            params.pattern = sn->pattern;
            params.query = sn->query;
            params.limit = sn->limit;
            return new SortStage(txn, params, ws, childStage);
        else if (STAGE_PROJECTION == root->getType()) {
            const ProjectionNode* pn = static_cast<const ProjectionNode*>(root);
            PlanStage* childStage = buildStages(txn, collection, qsol, pn->children[0], ws);
            if (NULL == childStage) { return NULL; }

            ProjectionStageParams params(WhereCallbackReal(txn, collection->ns().db()));
            params.projObj = pn->projection;

            // Stuff the right data into the params depending on what proj impl we use.
            if (ProjectionNode::DEFAULT == pn->projType) {
                params.fullExpression = pn->fullExpression;
                params.projImpl = ProjectionStageParams::NO_FAST_PATH;
            else if (ProjectionNode::COVERED_ONE_INDEX == pn->projType) {
                params.projImpl = ProjectionStageParams::COVERED_ONE_INDEX;
                params.coveredKeyObj = pn->coveredKeyObj;
            else {
                invariant(ProjectionNode::SIMPLE_DOC == pn->projType);
                params.projImpl = ProjectionStageParams::SIMPLE_DOC;

            return new ProjectionStage(params, ws, childStage);
        else if (STAGE_LIMIT == root->getType()) {
            const LimitNode* ln = static_cast<const LimitNode*>(root);
            PlanStage* childStage = buildStages(txn, collection, qsol, ln->children[0], ws);
            if (NULL == childStage) { return NULL; }
            return new LimitStage(ln->limit, ws, childStage);
        else if (STAGE_SKIP == root->getType()) {
            const SkipNode* sn = static_cast<const SkipNode*>(root);
            PlanStage* childStage = buildStages(txn, collection, qsol, sn->children[0], ws);
            if (NULL == childStage) { return NULL; }
            return new SkipStage(sn->skip, ws, childStage);
        else if (STAGE_AND_HASH == root->getType()) {
            const AndHashNode* ahn = static_cast<const AndHashNode*>(root);
            auto_ptr<AndHashStage> ret(new AndHashStage(txn, ws, ahn->filter.get(), collection));
            for (size_t i = 0; i < ahn->children.size(); ++i) {
                PlanStage* childStage = buildStages(txn, collection, qsol, ahn->children[i], ws);
                if (NULL == childStage) { return NULL; }
            return ret.release();
        else if (STAGE_OR == root->getType()) {
            const OrNode * orn = static_cast<const OrNode*>(root);
            auto_ptr<OrStage> ret(new OrStage(ws, orn->dedup, orn->filter.get()));
            for (size_t i = 0; i < orn->children.size(); ++i) {
                PlanStage* childStage = buildStages(txn, collection, qsol, orn->children[i], ws);
                if (NULL == childStage) { return NULL; }
            return ret.release();
        else if (STAGE_AND_SORTED == root->getType()) {
            const AndSortedNode* asn = static_cast<const AndSortedNode*>(root);
            auto_ptr<AndSortedStage> ret(new AndSortedStage(txn, ws, asn->filter.get(), collection));
            for (size_t i = 0; i < asn->children.size(); ++i) {
                PlanStage* childStage = buildStages(txn, collection, qsol, asn->children[i], ws);
                if (NULL == childStage) { return NULL; }
            return ret.release();
        else if (STAGE_SORT_MERGE == root->getType()) {
            const MergeSortNode* msn = static_cast<const MergeSortNode*>(root);
            MergeSortStageParams params;
            params.dedup = msn->dedup;
            params.pattern = msn->sort;
            auto_ptr<MergeSortStage> ret(new MergeSortStage(txn, params, ws, collection));
            for (size_t i = 0; i < msn->children.size(); ++i) {
                PlanStage* childStage = buildStages(txn, collection, qsol, msn->children[i], ws);
                if (NULL == childStage) { return NULL; }
            return ret.release();
        else if (STAGE_GEO_NEAR_2D == root->getType()) {
            const GeoNear2DNode* node = static_cast<const GeoNear2DNode*>(root);

            GeoNearParams params;
            params.nearQuery = node->nq;
            params.baseBounds = node->baseBounds;
            params.filter = node->filter.get();
            params.addPointMeta = node->addPointMeta;
            params.addDistMeta = node->addDistMeta;

            IndexDescriptor* twoDIndex = collection->getIndexCatalog()->findIndexByKeyPattern(txn,

            if (twoDIndex == NULL) {
                warning() << "Can't find 2D index " << node->indexKeyPattern.toString()
                          << "in namespace " << collection->ns() << endl;
                return NULL;

            GeoNear2DStage* nearStage = new GeoNear2DStage(params, txn, ws, collection, twoDIndex);

            return nearStage;
        else if (STAGE_GEO_NEAR_2DSPHERE == root->getType()) {
            const GeoNear2DSphereNode* node = static_cast<const GeoNear2DSphereNode*>(root);

            GeoNearParams params;
            params.nearQuery = node->nq;
            params.baseBounds = node->baseBounds;
            params.filter = node->filter.get();
            params.addPointMeta = node->addPointMeta;
            params.addDistMeta = node->addDistMeta;

            IndexDescriptor* s2Index = collection->getIndexCatalog()->findIndexByKeyPattern(txn,

            if (s2Index == NULL) {
                warning() << "Can't find 2DSphere index " << node->indexKeyPattern.toString()
                          << "in namespace " << collection->ns() << endl;
                return NULL;

            return new GeoNear2DSphereStage(params, txn, ws, collection, s2Index);
        else if (STAGE_TEXT == root->getType()) {
            const TextNode* node = static_cast<const TextNode*>(root);

            if (NULL == collection) {
                warning() << "Null collection for text";
                return NULL;
            vector<IndexDescriptor*> idxMatches;
            collection->getIndexCatalog()->findIndexByType(txn, "text", idxMatches);
            if (1 != idxMatches.size()) {
                warning() << "No text index, or more than one text index";
                return NULL;
            IndexDescriptor* index = idxMatches[0];
            const FTSAccessMethod* fam =
                static_cast<FTSAccessMethod*>( collection->getIndexCatalog()->getIndex( index ) );
            TextStageParams params(fam->getSpec());

            //params.collection = collection;
            params.index = index;
            params.spec = fam->getSpec();
            params.indexPrefix = node->indexPrefix;

            const std::string& language = ("" == node->language
                                           ? fam->getSpec().defaultLanguage().str()
                                           : node->language);

            Status parseStatus = params.query.parse(node->query, language,
            if (!parseStatus.isOK()) {
                warning() << "Can't parse text search query";
                return NULL;

            return new TextStage(txn, params, ws, node->filter.get());
        else if (STAGE_SHARDING_FILTER == root->getType()) {
            const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root);
            PlanStage* childStage = buildStages(txn, collection, qsol, fn->children[0], ws);
            if (NULL == childStage) { return NULL; }
            return new ShardFilterStage(shardingState.getCollectionMetadata(collection->ns()),
                                        ws, childStage);
        else if (STAGE_KEEP_MUTATIONS == root->getType()) {
            const KeepMutationsNode* km = static_cast<const KeepMutationsNode*>(root);
            PlanStage* childStage = buildStages(txn, collection, qsol, km->children[0], ws);
            if (NULL == childStage) { return NULL; }
            return new KeepMutationsStage(km->filter.get(), ws, childStage);
        else if (STAGE_DISTINCT == root->getType()) {
            const DistinctNode* dn = static_cast<const DistinctNode*>(root);

            if (NULL == collection) {
                warning() << "Can't distinct-scan null namespace";
                return NULL;

            DistinctParams params;

            params.descriptor =
                collection->getIndexCatalog()->findIndexByKeyPattern(txn, dn->indexKeyPattern);
            params.direction = dn->direction;
            params.bounds = dn->bounds;
            params.fieldNo = dn->fieldNo;
            return new DistinctScan(txn, params, ws);
        else if (STAGE_COUNT_SCAN == root->getType()) {
            const CountNode* cn = static_cast<const CountNode*>(root);

            if (NULL == collection) {
                warning() << "Can't fast-count null namespace (collection null)";
                return NULL;

            CountScanParams params;

            params.descriptor =
                collection->getIndexCatalog()->findIndexByKeyPattern(txn, cn->indexKeyPattern);
            params.startKey = cn->startKey;
            params.startKeyInclusive = cn->startKeyInclusive;
            params.endKey = cn->endKey;
            params.endKeyInclusive = cn->endKeyInclusive;

            return new CountScan(txn, params, ws);
        else {
            mongoutils::str::stream ss;
            root->appendToString(&ss, 0);
            string nodeStr(ss);
            warning() << "Can't build exec tree for node " << nodeStr << endl;
            return NULL;
Beispiel #5
    std::string runQuery(OperationContext* txn,
                         QueryMessage& q,
                         const NamespaceString& nss,
                         CurOp& curop,
                         Message &result) {
        // Validate the namespace.
        uassert(16256, str::stream() << "Invalid ns [" << nss.ns() << "]", nss.isValid());

        // Set curop information.
        beginQueryOp(nss, q.query, q.ntoreturn, q.ntoskip, &curop);

        // Parse the qm into a CanonicalQuery.
        std::auto_ptr<CanonicalQuery> cq;
            CanonicalQuery* cqRaw;
            Status canonStatus = CanonicalQuery::canonicalize(q,
                                                              WhereCallbackReal(txn, nss.db()));
            if (!canonStatus.isOK()) {
                uasserted(17287, str::stream() << "Can't canonicalize query: "
                                               << canonStatus.toString());

        LOG(5) << "Running query:\n" << cq->toString();
        LOG(2) << "Running query: " << cq->toStringShort();

        // Parse, canonicalize, plan, transcribe, and get a plan executor.
        AutoGetCollectionForRead ctx(txn, nss);
        Collection* collection = ctx.getCollection();

        const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() :

        // We have a parsed query. Time to get the execution plan for it.
        std::unique_ptr<PlanExecutor> exec;
            PlanExecutor* rawExec;
            Status execStatus = getExecutorFind(txn,
        const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed();

        // If it's actually an explain, do the explain and return rather than falling through
        // to the normal query execution loop.
        if (pq.isExplain()) {
            BufBuilder bb;

            BSONObjBuilder explainBob;
            Explain::explainStages(exec.get(), ExplainCommon::EXEC_ALL_PLANS, &explainBob);

            // Add the resulting object to the return buffer.
            BSONObj explainObj = explainBob.obj();
            bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize());

            // TODO: Does this get overwritten/do we really need to set this twice?
            curop.debug().query = q.query;

            // Set query result fields.
            QueryResult::View qr = bb.buf();
            curop.debug().responseLength = bb.len();
            result.setData(qr.view2ptr(), true);
            return "";

        // We freak out later if this changes before we're done with the query.
        const ChunkVersion shardingVersionAtStart = shardingState.getVersion(nss.ns());

        // Handle query option $maxTimeMS (not used with commands).
        curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000);
        txn->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point.

        // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set.
        bool slaveOK = pq.isSlaveOk() || pq.hasReadPref();
        Status serveReadsStatus = repl::getGlobalReplicationCoordinator()->checkCanServeReadsFor(

        // Run the query.
        // bb is used to hold query results
        // this buffer should contain either requested documents per query or
        // explain information, but not both
        BufBuilder bb(32768);

        // How many results have we obtained from the executor?
        int numResults = 0;

        // If we're replaying the oplog, we save the last time that we read.
        Timestamp slaveReadTill;

        BSONObj obj;
        PlanExecutor::ExecState state;
        // uint64_t numMisplacedDocs = 0;

        // Get summary info about which plan the executor is using.
        curop.debug().planSummary = Explain::getPlanSummary(exec.get());

        while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
            // Add result to output buffer.
            bb.appendBuf((void*)obj.objdata(), obj.objsize());

            // Count the result.

            // Possibly note slave's position in the oplog.
            if (pq.isOplogReplay()) {
                BSONElement e = obj["ts"];
                if (Date == e.type() || bsonTimestamp == e.type()) {
                    slaveReadTill = e.timestamp();

            if (enoughForFirstBatch(pq, numResults, bb.len())) {
                LOG(5) << "Enough for first batch, wantMore=" << pq.wantMore()
                       << " numToReturn=" << pq.getNumToReturn()
                       << " numResults=" << numResults
                       << endl;

        // If we cache the executor later, we want to deregister it as it receives notifications
        // anyway by virtue of being cached.
        // If we don't cache the executor later, we are deleting it, so it must be deregistered.
        // So, no matter what, deregister the executor.

        // Caller expects exceptions thrown in certain cases.
        if (PlanExecutor::FAILURE == state) {
            scoped_ptr<PlanStageStats> stats(exec->getStats());
            error() << "Plan executor error, stats: "
                    << Explain::statsToBSON(*stats);
            uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));

        // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the
        // chunk belonged on this node are gone. Separating chunk lifetime management from
        // ClientCursor should allow this check to go away.
        if (!shardingState.getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
            // if the version changed during the query we might be missing some data and its safe to
            // send this as mongos can resend at this point
            throw SendStaleConfigException(nss.ns(), "version changed during initial query",

        // Fill out curop based on query results. If we have a cursorid, we will fill out curop with
        // this cursorid later.
        long long ccId = 0;

        if (shouldSaveCursor(txn, collection, state, exec.get())) {
            // We won't use the executor until it's getMore'd.

            // Allocate a new ClientCursor.  We don't have to worry about leaking it as it's
            // inserted into a global map by its ctor.
            ClientCursor* cc = new ClientCursor(collection->getCursorManager(),
            ccId = cc->cursorid();

            if (txn->getClient()->isInDirectClient()) {
            else if (state == PlanExecutor::IS_EOF && pq.isTailable()) {
                // Don't stash the RU for tailable cursors at EOF, let them get a new RU on their
                // next getMore.
            else {
                // We stash away the RecoveryUnit in the ClientCursor.  It's used for subsequent
                // getMore requests.  The calling OpCtx gets a fresh RecoveryUnit.
                StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
                          == OperationContext::kNotInUnitOfWork);

            LOG(5) << "caching executor with cursorid " << ccId
                   << " after returning " << numResults << " results" << endl;

            // TODO document
            if (pq.isOplogReplay() && !slaveReadTill.isNull()) {

            // TODO document
            if (pq.isExhaust()) {
                curop.debug().exhaust = true;


            // If the query had a time limit, remaining time is "rolled over" to the cursor (for
            // use by future getmore ops).

            endQueryOp(cc->getExecutor(), dbProfilingLevel, numResults, ccId, &curop);
        else {
            LOG(5) << "Not caching executor but returning " << numResults << " results.\n";
            endQueryOp(exec.get(), dbProfilingLevel, numResults, ccId, &curop);

        // Add the results from the query into the output buffer.
        result.appendData(bb.buf(), bb.len());

        // Fill out the output buffer's header.
        QueryResult::View qr = result.header().view2ptr();

        // curop.debug().exhaust is set above.
        return curop.debug().exhaust ? nss.ns() : "";
Beispiel #6
         * Runs the command object cmdobj on the db with name dbname and puts result in result.
         * @param dbname, name of db
         * @param cmdobj, object that contains entire command
         * @param options
         * @param errmsg, reference to error message
         * @param result, reference to builder for result
         * @param fromRepl
         * @return true if successful, false otherwise
        bool FTSCommand::_run(OperationContext* txn,
                              const string& dbname,
                              BSONObj& cmdObj,
                              int cmdOptions,
                              const string& ns,
                              const string& searchString,
                              string language, // "" for not-set
                              int limit,
                              BSONObj& filter,
                              BSONObj& projection,
                              string& errmsg,
                              BSONObjBuilder& result ) {

            Timer comm;

            // Rewrite the cmd as a normal query.
            BSONObjBuilder queryBob;

            BSONObjBuilder textBob;
            textBob.append("$search", searchString);
            if (!language.empty()) {
                textBob.append("$language", language);
            queryBob.append("$text", textBob.obj());

            // This is the query we exec.
            BSONObj queryObj = queryBob.obj();

            // We sort by the score.
            BSONObj sortSpec = BSON("$s" << BSON("$meta" << LiteParsedQuery::metaTextScore));

            // We also project the score into the document and strip it out later during the reformatting
            // of the results.
            BSONObjBuilder projBob;
            BSONObj projObj = projBob.obj();

            AutoGetCollectionForRead ctx(txn, ns);

            CanonicalQuery* cq;
            Status canonicalizeStatus = 
                                                 WhereCallbackReal(txn, dbname));
            if (!canonicalizeStatus.isOK()) {
                errmsg = canonicalizeStatus.reason();
                return false;

            PlanExecutor* rawExec;
            Status getExecStatus = getExecutor(txn, ctx.getCollection(), cq, &rawExec);
            if (!getExecStatus.isOK()) {
                errmsg = getExecStatus.reason();
                return false;

            auto_ptr<PlanExecutor> exec(rawExec);

            BSONArrayBuilder resultBuilder(result.subarrayStart("results"));

            // Quoth: "leave a mb for other things"
            int resultSize = 1024 * 1024;

            int numReturned = 0;

            BSONObj obj;
            while (PlanExecutor::ADVANCED == exec->getNext(&obj, NULL)) {
                if ((resultSize + obj.objsize()) >= BSONObjMaxUserSize) {
                // We return an array of results.  Add another element.
                BSONObjBuilder oneResultBuilder(resultBuilder.subobjStart());
                oneResultBuilder.append("score", obj["$s"].number());

                // Strip out the score from the returned obj.
                BSONObjIterator resIt(obj);
                BSONObjBuilder resBob;
                while (resIt.more()) {
                    BSONElement elt =;
                    if (!mongoutils::str::equals("$s", elt.fieldName())) {
                oneResultBuilder.append("obj", resBob.obj());
                BSONObj addedArrayObj = oneResultBuilder.done();
                resultSize += addedArrayObj.objsize();


            // returns some stats to the user
            BSONObjBuilder stats(result.subobjStart("stats"));

            // Fill in nscanned from the explain.
            PlanSummaryStats summary;
            Explain::getSummaryStats(exec.get(), &summary);
            stats.appendNumber("nscanned", summary.totalKeysExamined);
            stats.appendNumber("nscannedObjects", summary.totalDocsExamined);

            stats.appendNumber( "n" , numReturned );
            stats.append( "timeMicros", (int)comm.micros() );

            return true;
Beispiel #7
        PlanStage* parseQuery(OperationContext* txn,
                              Collection* collection,
                              BSONObj obj,
                              WorkingSet* workingSet,
                              OwnedPointerVector<MatchExpression>* exprs) {

            BSONElement firstElt = obj.firstElement();
            if (!firstElt.isABSONObj()) { return NULL; }
            BSONObj paramObj = firstElt.Obj();

            MatchExpression* matcher = NULL;
            BSONObj nodeArgs;

            // Every node has these two fields.
            const string filterTag = "filter";
            const string argsTag = "args";

            BSONObjIterator it(paramObj);
            while (it.more()) {
                BSONElement e =;
                if (!e.isABSONObj()) { return NULL; }
                BSONObj argObj = e.Obj();
                if (filterTag == e.fieldName()) {
                    StatusWithMatchExpression swme = MatchExpressionParser::parse(
                                        argObj, WhereCallbackReal(txn, collection->ns().db()));
                    if (!swme.isOK()) { return NULL; }
                    // exprs is what will wind up deleting this.
                    matcher = swme.getValue();
                    verify(NULL != matcher);
                else if (argsTag == e.fieldName()) {
                    nodeArgs = argObj;
                else {
                    uasserted(16910, "Unknown fieldname " + string(e.fieldName())
                                     + " in query node " + obj.toString());
                    return NULL;

            string nodeName = firstElt.fieldName();

            if ("ixscan" == nodeName) {
                // This'll throw if it's not an obj but that's OK.
                BSONObj keyPatternObj = nodeArgs["keyPattern"].Obj();

                IndexDescriptor* desc =
                uassert(16890, "Can't find index: " + keyPatternObj.toString(), desc);

                IndexScanParams params;
                params.descriptor = desc;
                params.bounds.isSimpleRange = true;
                params.bounds.startKey = nodeArgs["startKey"].Obj();
                params.bounds.endKey = nodeArgs["endKey"].Obj();
                params.bounds.endKeyInclusive = nodeArgs["endKeyInclusive"].Bool();
                params.direction = nodeArgs["direction"].numberInt();

                return new IndexScan(txn, params, workingSet, matcher);
            else if ("andHash" == nodeName) {
                uassert(16921, "Nodes argument must be provided to AND",

                auto_ptr<AndHashStage> andStage(new AndHashStage(workingSet, matcher, collection));

                int nodesAdded = 0;
                BSONObjIterator it(nodeArgs["nodes"].Obj());
                while (it.more()) {
                    BSONElement e =;
                    uassert(16922, "node of AND isn't an obj?: " + e.toString(),

                    PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs);
                    uassert(16923, "Can't parse sub-node of AND: " + e.Obj().toString(),
                            NULL != subNode);
                    // takes ownership

                uassert(16927, "AND requires more than one child", nodesAdded >= 2);

                return andStage.release();
            else if ("andSorted" == nodeName) {
                uassert(16924, "Nodes argument must be provided to AND",

                auto_ptr<AndSortedStage> andStage(
                                            new AndSortedStage(workingSet, matcher, collection));

                int nodesAdded = 0;
                BSONObjIterator it(nodeArgs["nodes"].Obj());
                while (it.more()) {
                    BSONElement e =;
                    uassert(16925, "node of AND isn't an obj?: " + e.toString(),

                    PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs);
                    uassert(16926, "Can't parse sub-node of AND: " + e.Obj().toString(),
                            NULL != subNode);
                    // takes ownership

                uassert(16928, "AND requires more than one child", nodesAdded >= 2);

                return andStage.release();
            else if ("or" == nodeName) {
                uassert(16934, "Nodes argument must be provided to AND",
                uassert(16935, "Dedup argument must be provided to OR",
                BSONObjIterator it(nodeArgs["nodes"].Obj());
                auto_ptr<OrStage> orStage(new OrStage(workingSet, nodeArgs["dedup"].Bool(),
                while (it.more()) {
                    BSONElement e =;
                    if (!e.isABSONObj()) { return NULL; }
                    PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs);
                    uassert(16936, "Can't parse sub-node of OR: " + e.Obj().toString(),
                            NULL != subNode);
                    // takes ownership

                return orStage.release();
            else if ("fetch" == nodeName) {
                uassert(16929, "Node argument must be provided to fetch",
                PlanStage* subNode = parseQuery(txn,
                return new FetchStage(workingSet, subNode, matcher, collection);
            else if ("limit" == nodeName) {
                uassert(16937, "Limit stage doesn't have a filter (put it on the child)",
                        NULL == matcher);
                uassert(16930, "Node argument must be provided to limit",
                uassert(16931, "Num argument must be provided to limit",
                PlanStage* subNode = parseQuery(txn,
                return new LimitStage(nodeArgs["num"].numberInt(), workingSet, subNode);
            else if ("skip" == nodeName) {
                uassert(16938, "Skip stage doesn't have a filter (put it on the child)",
                        NULL == matcher);
                uassert(16932, "Node argument must be provided to skip",
                uassert(16933, "Num argument must be provided to skip",
                PlanStage* subNode = parseQuery(txn,
                return new SkipStage(nodeArgs["num"].numberInt(), workingSet, subNode);
            else if ("cscan" == nodeName) {
                CollectionScanParams params;
                params.collection = collection;

                // What direction?
                uassert(16963, "Direction argument must be specified and be a number",
                if (1 == nodeArgs["direction"].numberInt()) {
                    params.direction = CollectionScanParams::FORWARD;
                else {
                    params.direction = CollectionScanParams::BACKWARD;

                return new CollectionScan(txn, params, workingSet, matcher);
            // sort is disabled for now.
#if 0
            else if ("sort" == nodeName) {
                uassert(16969, "Node argument must be provided to sort",
                uassert(16970, "Pattern argument must be provided to sort",
                PlanStage* subNode = parseQuery(txn, db, nodeArgs["node"].Obj(), workingSet, exprs);
                SortStageParams params;
                params.pattern = nodeArgs["pattern"].Obj();
                return new SortStage(params, workingSet, subNode);
            else if ("mergeSort" == nodeName) {
                uassert(16971, "Nodes argument must be provided to sort",
                uassert(16972, "Pattern argument must be provided to sort",

                MergeSortStageParams params;
                params.pattern = nodeArgs["pattern"].Obj();
                // Dedup is true by default.

                auto_ptr<MergeSortStage> mergeStage(
                                            new MergeSortStage(params, workingSet, collection));

                BSONObjIterator it(nodeArgs["nodes"].Obj());
                while (it.more()) {
                    BSONElement e =;
                    uassert(16973, "node of mergeSort isn't an obj?: " + e.toString(),

                    PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs);
                    uassert(16974, "Can't parse sub-node of mergeSort: " + e.Obj().toString(),
                            NULL != subNode);
                    // takes ownership
                return mergeStage.release();
            else if ("text" == nodeName) {
                string search = nodeArgs["search"].String();

                vector<IndexDescriptor*> idxMatches;
                collection->getIndexCatalog()->findIndexByType("text", idxMatches);
                uassert(17194, "Expected exactly one text index", idxMatches.size() == 1);

                IndexDescriptor* index = idxMatches[0];
                FTSAccessMethod* fam =
                    dynamic_cast<FTSAccessMethod*>( collection->getIndexCatalog()->getIndex( index ) );
                TextStageParams params(fam->getSpec());
                params.index = index;

                // TODO: Deal with non-empty filters.  This is a hack to put in covering information
                // that can only be checked for equality.  We ignore this now.
                Status s = fam->getSpec().getIndexPrefix(BSONObj(), &params.indexPrefix);
                if (!s.isOK()) {
                    // errmsg = s.toString();
                    return NULL;

                params.spec = fam->getSpec();

                if (!params.query.parse(search,
                                        fam->getSpec().defaultLanguage().str().c_str()).isOK()) {
                    return NULL;

                return new TextStage(txn, params, workingSet, matcher);
            else if ("delete" == nodeName) {
                uassert(18636, "Delete stage doesn't have a filter (put it on the child)",
                        NULL == matcher);
                uassert(18637, "node argument must be provided to delete",
                uassert(18638, "isMulti argument must be provided to delete",
                        nodeArgs["isMulti"].type() == Bool);
                uassert(18639, "shouldCallLogOp argument must be provided to delete",
                        nodeArgs["shouldCallLogOp"].type() == Bool);
                PlanStage* subNode = parseQuery(txn,
                DeleteStageParams params;
                params.isMulti = nodeArgs["isMulti"].Bool();
                params.shouldCallLogOp = nodeArgs["shouldCallLogOp"].Bool();
                return new DeleteStage(txn, params, workingSet, collection, subNode);
            else {
                return NULL;