예제 #1
    // static
    Status QueryPlanner::planFromCache(const CanonicalQuery& query,
                                       const QueryPlannerParams& params,
                                       CachedSolution* cachedSoln,
                                       QuerySolution** out) {

        // Create a copy of the expression tree.  We use cachedSoln to annotate this with indices.
        MatchExpression* clone = query.root()->shallowClone();

        // XXX: Use data in cachedSoln to tag 'clone' with the indices used.  The tags use an index
        // ID which is an index into some vector of IndexEntry(s).  How do we maintain this across
        // calls to plan?  Do we want to store in the soln the keypatterns of the indices and just
        // map those to an index into params.indices?  Might be easiest thing to do, and certainly
        // most intelligible for debugging.

        // Use the cached index assignments to build solnRoot.  Takes ownership of clone.
        QuerySolutionNode* solnRoot =
            QueryPlannerAccess::buildIndexedDataAccess(query, clone, false, params.indices);

        // XXX: are the NULL cases an error/when does this happen / can this happen?
        if (NULL != solnRoot) {
            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
            if (NULL != soln) {
                QLOG() << "Planner: adding cached solution:\n" << soln->toString() << endl;
                *out = soln;

        // XXX: if any NULLs return error status?
        return Status::OK();
예제 #2
        void run() {
            for (int i = 0; i < N; ++i) {
                insert(BSON("a" << 1));
                insert(BSON("a" << 1 << "b" << 1 << "c" << i));

            // Indices on 'a' and 'b'.
            addIndex(BSON("a" << 1));
            addIndex(BSON("b" << 1));

            // Solutions using either 'a' or 'b' will take a long time to start producing
            // results. However, an index scan on 'b' will start producing results sooner
            // than an index scan on 'a'.
            CanonicalQuery* cq;
                                                fromjson("{a: 1, b: 1, c: {$gte: 5000}}"),
            ASSERT(NULL != cq);

            // Use index on 'b'.
            QuerySolution* soln = pickBestPlan(cq);
            std::cerr << "PlanRankingWorkPlansLongEnough: soln=" << soln->toString() << std::endl;
                        "{fetch: {node: {ixscan: {pattern: {b: 1}}}}}",
예제 #3
    Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
        // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
        // execution work that happens here, so this is needed for the time accounting to
        // make sense.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        // Run each plan some number of times. This number is at least as great as
        // 'internalQueryPlanEvaluationWorks', but may be larger for big collections.
        size_t numWorks = internalQueryPlanEvaluationWorks;
        if (NULL != _collection) {
            // For large collections, the number of works is set to be this
            // fraction of the collection size.
            double fraction = internalQueryPlanEvaluationCollFraction;

            numWorks = std::max(size_t(internalQueryPlanEvaluationWorks),
                                size_t(fraction * _collection->numRecords(_txn)));

        // We treat ntoreturn as though it is a limit during plan ranking.
        // This means that ranking might not be great for sort + batchSize.
        // But it also means that we don't buffer too much data for sort + limit.
        // See SERVER-14174 for details.
        size_t numToReturn = _query->getParsed().getNumToReturn();

        // Determine the number of results which we will produce during the plan
        // ranking phase before stopping.
        size_t numResults = (size_t)internalQueryPlanEvaluationMaxResults;
        if (numToReturn > 0) {
            numResults = std::min(numToReturn, numResults);

        // Work the plans, stopping when a plan hits EOF or returns some
        // fixed number of results.
        for (size_t ix = 0; ix < numWorks; ++ix) {
            bool moreToDo = workAllPlans(numResults, yieldPolicy);
            if (!moreToDo) { break; }

        if (_failure) {
            invariant(WorkingSet::INVALID_ID != _statusMemberId);
            WorkingSetMember* member = _candidates[0].ws->get(_statusMemberId);
            return WorkingSetCommon::getMemberStatus(*member);

        // After picking best plan, ranking will own plan stats from
        // candidate solutions (winner and losers).
        std::auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision);
        _bestPlanIdx = PlanRanker::pickBestPlan(_candidates, ranking.get());
        verify(_bestPlanIdx >= 0 && _bestPlanIdx < static_cast<int>(_candidates.size()));

        // Copy candidate order. We will need this to sort candidate stats for explain
        // after transferring ownership of 'ranking' to plan cache.
        std::vector<size_t> candidateOrder = ranking->candidateOrder;

        CandidatePlan& bestCandidate = _candidates[_bestPlanIdx];
        std::list<WorkingSetID>& alreadyProduced = bestCandidate.results;
        QuerySolution* bestSolution = bestCandidate.solution;

        LOG(5) << "Winning solution:\n" << bestSolution->toString() << endl;
        LOG(2) << "Winning plan: " << Explain::getPlanSummary(bestCandidate.root);

        _backupPlanIdx = kNoSuchPlan;
        if (bestSolution->hasBlockingStage && (0 == alreadyProduced.size())) {
            LOG(5) << "Winner has blocking stage, looking for backup plan...\n";
            for (size_t ix = 0; ix < _candidates.size(); ++ix) {
                if (!_candidates[ix].solution->hasBlockingStage) {
                    LOG(5) << "Candidate " << ix << " is backup child\n";
                    _backupPlanIdx = ix;

        // Logging for tied plans.
        if (ranking->tieForBest && NULL != _collection) {
            // These arrays having two or more entries is implied by 'tieForBest'.
            invariant(ranking->scores.size() > 1);
            invariant(ranking->candidateOrder.size() > 1);

            size_t winnerIdx = ranking->candidateOrder[0];
            size_t runnerUpIdx = ranking->candidateOrder[1];

            LOG(1) << "Winning plan tied with runner-up."
                   << " ns: " << _collection->ns()
                   << " " << _query->toStringShort()
                   << " winner score: " << ranking->scores[0]
                   << " winner summary: "
                   << Explain::getPlanSummary(_candidates[winnerIdx].root)
                   << " runner-up score: " << ranking->scores[1]
                   << " runner-up summary: "
                   << Explain::getPlanSummary(_candidates[runnerUpIdx].root);

            // There could be more than a 2-way tie, so log the stats for the remaining plans
            // involved in the tie.
            static const double epsilon = 1e-10;
            for (size_t i = 2; i < ranking->scores.size(); i++) {
                if (fabs(ranking->scores[i] - ranking->scores[0]) >= epsilon) {

                size_t planIdx = ranking->candidateOrder[i];

                LOG(1) << "Plan " << i << " involved in multi-way tie."
                       << " ns: " << _collection->ns()
                       << " " << _query->toStringShort()
                       << " score: " << ranking->scores[i]
                       << " summary: "
                       << Explain::getPlanSummary(_candidates[planIdx].root);

        // If the winning plan produced no results during the ranking period (and, therefore, no
        // plan produced results during the ranking period), then we will not create a plan cache
        // entry.
        if (alreadyProduced.empty() && NULL != _collection) {
            size_t winnerIdx = ranking->candidateOrder[0];
            LOG(1) << "Winning plan had zero results. Not caching."
                   << " ns: " << _collection->ns()
                   << " " << _query->toStringShort()
                   << " winner score: " << ranking->scores[0]
                   << " winner summary: "
                   << Explain::getPlanSummary(_candidates[winnerIdx].root);

        // Store the choice we just made in the cache. In order to do so,
        //   1) the query must be of a type that is safe to cache,
        //   2) two or more plans cannot have tied for the win. Caching in the case of ties can
        //   cause successive queries of the same shape to use a bad index.
        //   3) Furthermore, the winning plan must have returned at least one result. Plans which
        //   return zero results cannot be reliably ranked. Such query shapes are generally
        //   existence type queries, and a winning plan should get cached once the query finds a
        //   result.
        if (PlanCache::shouldCacheQuery(*_query)
            && !ranking->tieForBest
            && !alreadyProduced.empty()) {
            // Create list of candidate solutions for the cache with
            // the best solution at the front.
            std::vector<QuerySolution*> solutions;

            // Generate solutions and ranking decisions sorted by score.
            for (size_t orderingIndex = 0;
                 orderingIndex < candidateOrder.size(); ++orderingIndex) {
                // index into candidates/ranking
                size_t ix = candidateOrder[orderingIndex];

            // Check solution cache data. Do not add to cache if
            // we have any invalid SolutionCacheData data.
            // XXX: One known example is 2D queries
            bool validSolutions = true;
            for (size_t ix = 0; ix < solutions.size(); ++ix) {
                if (NULL == solutions[ix]->cacheData.get()) {
                    LOG(5) << "Not caching query because this solution has no cache data: "
                           << solutions[ix]->toString();
                    validSolutions = false;

            if (validSolutions) {
                _collection->infoCache()->getPlanCache()->add(*_query, solutions, ranking.release());

        return Status::OK();
예제 #4
// static
Status QueryPlanner::plan(const CanonicalQuery& query,
                          const QueryPlannerParams& params,
                          std::vector<QuerySolution*>* out) {
    LOG(5) << "Beginning planning..." << endl
           << "=============================" << endl
           << "Options = " << optionString(params.options) << endl
           << "Canonical query:" << endl
           << query.toString() << "=============================" << endl;

    for (size_t i = 0; i < params.indices.size(); ++i) {
        LOG(5) << "Index " << i << " is " << params.indices[i].toString() << endl;

    bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

    // If the query requests a tailable cursor, the only solution is a collscan + filter with
    // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
    // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
    // can't provide one.  Is this what we want?
    if (query.getParsed().isTailable()) {
        if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) {
            QuerySolution* soln = buildCollscanSoln(query, true, params);
            if (NULL != soln) {
        return Status::OK();

    // The hint or sort can be $natural: 1.  If this happens, output a collscan. If both
    // a $natural hint and a $natural sort are specified, then the direction of the collscan
    // is determined by the sign of the sort (not the sign of the hint).
    if (!query.getParsed().getHint().isEmpty() || !query.getParsed().getSort().isEmpty()) {
        BSONObj hintObj = query.getParsed().getHint();
        BSONObj sortObj = query.getParsed().getSort();
        BSONElement naturalHint = hintObj.getFieldDotted("$natural");
        BSONElement naturalSort = sortObj.getFieldDotted("$natural");

        // A hint overrides a $natural sort. This means that we don't force a table
        // scan if there is a $natural sort with a non-$natural hint.
        if (!naturalHint.eoo() || (!naturalSort.eoo() && hintObj.isEmpty())) {
            LOG(5) << "Forcing a table scan due to hinted $natural\n";
            // min/max are incompatible with $natural.
            if (canTableScan && query.getParsed().getMin().isEmpty() &&
                query.getParsed().getMax().isEmpty()) {
                QuerySolution* soln = buildCollscanSoln(query, false, params);
                if (NULL != soln) {
            return Status::OK();

    // Figure out what fields we care about.
    unordered_set<string> fields;
    QueryPlannerIXSelect::getFields(query.root(), "", &fields);

    for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
        LOG(5) << "Predicate over field '" << *it << "'" << endl;

    // Filter our indices so we only look at indices that are over our predicates.
    vector<IndexEntry> relevantIndices;

    // Hints require us to only consider the hinted index.
    // If index filters in the query settings were used to override
    // the allowed indices for planning, we should not use the hinted index
    // requested in the query.
    BSONObj hintIndex;
    if (!params.indexFiltersApplied) {
        hintIndex = query.getParsed().getHint();

    // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
    // plan.  If that fails, just scan the _id index.
    if (query.getParsed().isSnapshot()) {
        // Find the ID index in indexKeyPatterns.  It's our hint.
        for (size_t i = 0; i < params.indices.size(); ++i) {
            if (isIdIndex(params.indices[i].keyPattern)) {
                hintIndex = params.indices[i].keyPattern;

    size_t hintIndexNumber = numeric_limits<size_t>::max();

    if (hintIndex.isEmpty()) {
        QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);
    } else {
        // Sigh.  If the hint is specified it might be using the index name.
        BSONElement firstHintElt = hintIndex.firstElement();
        if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
            string hintName = firstHintElt.String();
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (params.indices[i].name == hintName) {
                    LOG(5) << "Hint by name specified, restricting indices to "
                           << params.indices[i].keyPattern.toString() << endl;
                    hintIndexNumber = i;
                    hintIndex = params.indices[i].keyPattern;
        } else {
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                    LOG(5) << "Hint specified, restricting indices to " << hintIndex.toString()
                           << endl;
                    hintIndexNumber = i;

        if (hintIndexNumber == numeric_limits<size_t>::max()) {
            return Status(ErrorCodes::BadValue, "bad hint");

    // Deal with the .min() and .max() query options.  If either exist we can only use an index
    // that matches the object inside.
    if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) {
        BSONObj minObj = query.getParsed().getMin();
        BSONObj maxObj = query.getParsed().getMax();

        // The unfinished siblings of these objects may not be proper index keys because they
        // may be empty objects or have field names. When an index is picked to use for the
        // min/max query, these "finished" objects will always be valid index keys for the
        // index's key pattern.
        BSONObj finishedMinObj;
        BSONObj finishedMaxObj;

        // This is the index into params.indices[...] that we use.
        size_t idxNo = numeric_limits<size_t>::max();

        // If there's an index hinted we need to be able to use it.
        if (!hintIndex.isEmpty()) {
            if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) {
                LOG(5) << "Minobj doesn't work with hint";
                return Status(ErrorCodes::BadValue, "hint provided does not work with min query");

            if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) {
                LOG(5) << "Maxobj doesn't work with hint";
                return Status(ErrorCodes::BadValue, "hint provided does not work with max query");

            const BSONObj& kp = params.indices[hintIndexNumber].keyPattern;
            finishedMinObj = finishMinObj(kp, minObj, maxObj);
            finishedMaxObj = finishMaxObj(kp, minObj, maxObj);

            // The min must be less than the max for the hinted index ordering.
            if (0 <= finishedMinObj.woCompare(finishedMaxObj, kp, false)) {
                LOG(5) << "Minobj/Maxobj don't work with hint";
                return Status(ErrorCodes::BadValue,
                              "hint provided does not work with min/max query");

            idxNo = hintIndexNumber;
        } else {
            // No hinted index, look for one that is compatible (has same field names and
            // ordering thereof).
            for (size_t i = 0; i < params.indices.size(); ++i) {
                const BSONObj& kp = params.indices[i].keyPattern;

                BSONObj toUse = minObj.isEmpty() ? maxObj : minObj;
                if (indexCompatibleMaxMin(toUse, kp)) {
                    // In order to be fully compatible, the min has to be less than the max
                    // according to the index key pattern ordering. The first step in verifying
                    // this is "finish" the min and max by replacing empty objects and stripping
                    // field names.
                    finishedMinObj = finishMinObj(kp, minObj, maxObj);
                    finishedMaxObj = finishMaxObj(kp, minObj, maxObj);

                    // Now we have the final min and max. This index is only relevant for
                    // the min/max query if min < max.
                    if (0 >= finishedMinObj.woCompare(finishedMaxObj, kp, false)) {
                        // Found a relevant index.
                        idxNo = i;

                    // This index is not relevant; move on to the next.

        if (idxNo == numeric_limits<size_t>::max()) {
            LOG(5) << "Can't find relevant index to use for max/min query";
            // Can't find an index to use, bail out.
            return Status(ErrorCodes::BadValue, "unable to find relevant index for max/min query");

        LOG(5) << "Max/min query using index " << params.indices[idxNo].toString() << endl;

        // Make our scan and output.
        QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan(
            params.indices[idxNo], query, params, finishedMinObj, finishedMaxObj);

        QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
        if (NULL != soln) {

        return Status::OK();

    for (size_t i = 0; i < relevantIndices.size(); ++i) {
        LOG(2) << "Relevant index " << i << " is " << relevantIndices[i].toString() << endl;

    // Figure out how useful each index is to each predicate.
    QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);
    QueryPlannerIXSelect::stripInvalidAssignments(query.root(), relevantIndices);

    // Unless we have GEO_NEAR, TEXT, or a projection, we may be able to apply an optimization
    // in which we strip unnecessary index assignments.
    // Disallowed with projection because assignment to a non-unique index can allow the plan
    // to be covered.
    // TEXT and GEO_NEAR are special because they require the use of a text/geo index in order
    // to be evaluated correctly. Stripping these "mandatory assignments" is therefore invalid.
    if (query.getParsed().getProj().isEmpty() &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {
        QueryPlannerIXSelect::stripUnneededAssignments(query.root(), relevantIndices);

    // query.root() is now annotated with RelevantTag(s).
    LOG(5) << "Rated tree:" << endl
           << query.root()->toString();

    // If there is a GEO_NEAR it must have an index it can use directly.
    MatchExpression* gnNode = NULL;
    if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
        // No index for GEO_NEAR?  No query.
        RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
        if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
            LOG(5) << "Unable to find index for $geoNear query." << endl;
            // Don't leave tags on query tree.
            return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query");

        LOG(5) << "Rated tree after geonear processing:" << query.root()->toString();

    // Likewise, if there is a TEXT it must have an index it can use directly.
    MatchExpression* textNode = NULL;
    if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
        RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());

        // Exactly one text index required for TEXT.  We need to check this explicitly because
        // the text stage can't be built if no text index exists or there is an ambiguity as to
        // which one to use.
        size_t textIndexCount = 0;
        for (size_t i = 0; i < params.indices.size(); i++) {
            if (INDEX_TEXT == params.indices[i].type) {
        if (textIndexCount != 1) {
            // Don't leave tags on query tree.
            return Status(ErrorCodes::BadValue, "need exactly one text index for $text query");

        // Error if the text node is tagged with zero indices.
        if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
            // Don't leave tags on query tree.
            return Status(ErrorCodes::BadValue,
                          "failed to use text index to satisfy $text query (if text index is "
                          "compound, are equality predicates given for all prefix fields?)");

        // At this point, we know that there is only one text index and that the TEXT node is
        // assigned to it.
        invariant(1 == tag->first.size() + tag->notFirst.size());

        LOG(5) << "Rated tree after text processing:" << query.root()->toString();

    // If we have any relevant indices, we try to create indexed plans.
    if (0 < relevantIndices.size()) {
        // The enumerator spits out trees tagged with IndexTag(s).
        PlanEnumeratorParams enumParams;
        enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION;
        enumParams.root = query.root();
        enumParams.indices = &relevantIndices;

        PlanEnumerator isp(enumParams);

        MatchExpression* rawTree;
        while (isp.getNext(&rawTree) && (out->size() < params.maxIndexedSolutions)) {
            LOG(5) << "About to build solntree from tagged tree:" << endl
                   << rawTree->toString();

            // The tagged tree produced by the plan enumerator is not guaranteed
            // to be canonically sorted. In order to be compatible with the cached
            // data, sort the tagged tree according to CanonicalQuery ordering.
            std::unique_ptr<MatchExpression> clone(rawTree->shallowClone());

            PlanCacheIndexTree* cacheData;
            Status indexTreeStatus =
                cacheDataFromTaggedTree(clone.get(), relevantIndices, &cacheData);
            if (!indexTreeStatus.isOK()) {
                LOG(5) << "Query is not cachable: " << indexTreeStatus.reason() << endl;
            unique_ptr<PlanCacheIndexTree> autoData(cacheData);

            // This can fail if enumeration makes a mistake.
            QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
                query, rawTree, false, relevantIndices, params);

            if (NULL == solnRoot) {

            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
            if (NULL != soln) {
                LOG(5) << "Planner: adding solution:" << endl
                       << soln->toString();
                if (indexTreeStatus.isOK()) {
                    SolutionCacheData* scd = new SolutionCacheData();

    // Don't leave tags on query tree.

    LOG(5) << "Planner: outputted " << out->size() << " indexed solutions.\n";

    // Produce legible error message for failed OR planning with a TEXT child.
    // TODO: support collection scan for non-TEXT children of OR.
    if (out->size() == 0 && textNode != NULL && MatchExpression::OR == query.root()->matchType()) {
        MatchExpression* root = query.root();
        for (size_t i = 0; i < root->numChildren(); ++i) {
            if (textNode == root->getChild(i)) {
                return Status(ErrorCodes::BadValue,
                              "Failed to produce a solution for TEXT under OR - "
                              "other non-TEXT clauses under OR have to be indexed as well.");

    // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
    // scan the entire index to provide results and output that as our plan.  This is the
    // desired behavior when an index is hinted that is not relevant to the query.
    if (!hintIndex.isEmpty()) {
        if (0 == out->size()) {
            QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
            verify(NULL != soln);
            LOG(5) << "Planner: outputting soln that uses hinted index as scan." << endl;
        return Status::OK();

    // If a sort order is requested, there may be an index that provides it, even if that
    // index is not over any predicates in the query.
    if (!query.getParsed().getSort().isEmpty() &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {
        // See if we have a sort provided from an index already.
        // This is implied by the presence of a non-blocking solution.
        bool usingIndexToSort = false;
        for (size_t i = 0; i < out->size(); ++i) {
            QuerySolution* soln = (*out)[i];
            if (!soln->hasBlockingStage) {
                usingIndexToSort = true;

        if (!usingIndexToSort) {
            for (size_t i = 0; i < params.indices.size(); ++i) {
                const IndexEntry& index = params.indices[i];
                // Only regular (non-plugin) indexes can be used to provide a sort, and only
                // non-sparse indexes can be used to provide a sort.
                // TODO: Sparse indexes can't normally provide a sort, because non-indexed
                // documents could potentially be missing from the result set.  However, if the
                // query predicate can be used to guarantee that all documents to be returned
                // are indexed, then the index should be able to provide the sort.
                // For example:
                // - Sparse index {a: 1, b: 1} should be able to provide a sort for
                //   find({b: 1}).sort({a: 1}).  SERVER-13908.
                // - Index {a: 1, b: "2dsphere"} (which is "geo-sparse", if
                //   2dsphereIndexVersion=2) should be able to provide a sort for
                //   find({b: GEO}).sort({a:1}).  SERVER-10801.
                if (index.type != INDEX_BTREE) {
                if (index.sparse) {

                // Partial indexes can only be used to provide a sort only if the query predicate is
                // compatible.
                if (index.filterExpr && !expression::isSubsetOf(query.root(), index.filterExpr)) {

                const BSONObj kp = QueryPlannerAnalysis::getSortPattern(index.keyPattern);
                if (providesSort(query, kp)) {
                    LOG(5) << "Planner: outputting soln that uses index to provide sort." << endl;
                    QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                    if (NULL != soln) {
                        PlanCacheIndexTree* indexTree = new PlanCacheIndexTree();
                        SolutionCacheData* scd = new SolutionCacheData();
                        scd->solnType = SolutionCacheData::WHOLE_IXSCAN_SOLN;
                        scd->wholeIXSolnDir = 1;

                if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                    LOG(5) << "Planner: outputting soln that uses (reverse) index "
                           << "to provide sort." << endl;
                    QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                    if (NULL != soln) {
                        PlanCacheIndexTree* indexTree = new PlanCacheIndexTree();
                        SolutionCacheData* scd = new SolutionCacheData();
                        scd->solnType = SolutionCacheData::WHOLE_IXSCAN_SOLN;
                        scd->wholeIXSolnDir = -1;


    // geoNear and text queries *require* an index.
    // Also, if a hint is specified it indicates that we MUST use it.
    bool possibleToCollscan =
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && hintIndex.isEmpty();

    // The caller can explicitly ask for a collscan.
    bool collscanRequested = (params.options & QueryPlannerParams::INCLUDE_COLLSCAN);

    // No indexed plans?  We must provide a collscan if possible or else we can't run the query.
    bool collscanNeeded = (0 == out->size() && canTableScan);

    if (possibleToCollscan && (collscanRequested || collscanNeeded)) {
        QuerySolution* collscan = buildCollscanSoln(query, false, params);
        if (NULL != collscan) {
            SolutionCacheData* scd = new SolutionCacheData();
            scd->solnType = SolutionCacheData::COLLSCAN_SOLN;
            LOG(5) << "Planner: outputting a collscan:" << endl
                   << collscan->toString();

    return Status::OK();
예제 #5
// static
Status QueryPlanner::planFromCache(const CanonicalQuery& query,
                                   const QueryPlannerParams& params,
                                   const CachedSolution& cachedSoln,
                                   QuerySolution** out) {

    // A query not suitable for caching should not have made its way into the cache.

    // Look up winning solution in cached solution's array.
    const SolutionCacheData& winnerCacheData = *cachedSoln.plannerData[0];

    if (SolutionCacheData::WHOLE_IXSCAN_SOLN == winnerCacheData.solnType) {
        // The solution can be constructed by a scan over the entire index.
        QuerySolution* soln = buildWholeIXSoln(
            *winnerCacheData.tree->entry, query, params, winnerCacheData.wholeIXSolnDir);
        if (soln == NULL) {
            return Status(ErrorCodes::BadValue,
                          "plan cache error: soln that uses index to provide sort");
        } else {
            *out = soln;
            return Status::OK();
    } else if (SolutionCacheData::COLLSCAN_SOLN == winnerCacheData.solnType) {
        // The cached solution is a collection scan. We don't cache collscans
        // with tailable==true, hence the false below.
        QuerySolution* soln = buildCollscanSoln(query, false, params);
        if (soln == NULL) {
            return Status(ErrorCodes::BadValue, "plan cache error: collection scan soln");
        } else {
            *out = soln;
            return Status::OK();

    // SolutionCacheData::USE_TAGS_SOLN == cacheData->solnType
    // If we're here then this is neither the whole index scan or collection scan
    // cases, and we proceed by using the PlanCacheIndexTree to tag the query tree.

    // Create a copy of the expression tree.  We use cachedSoln to annotate this with indices.
    unique_ptr<MatchExpression> clone = std::move(query.root()->shallowClone());

    LOG(5) << "Tagging the match expression according to cache data: " << endl
           << "Filter:" << endl
           << clone->toString() << "Cache data:" << endl
           << winnerCacheData.toString();

    // Map from index name to index number.
    // TODO: can we assume that the index numbering has the same lifetime
    // as the cache state?
    map<BSONObj, size_t> indexMap;
    for (size_t i = 0; i < params.indices.size(); ++i) {
        const IndexEntry& ie = params.indices[i];
        indexMap[ie.keyPattern] = i;
        LOG(5) << "Index " << i << ": " << ie.keyPattern.toString() << endl;

    Status s = tagAccordingToCache(clone.get(), winnerCacheData.tree.get(), indexMap);
    if (!s.isOK()) {
        return s;

    // The planner requires a defined sort order.

    LOG(5) << "Tagged tree:" << endl
           << clone->toString();

    // Use the cached index assignments to build solnRoot.
    QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
        query, clone.release(), false, params.indices, params);

    if (!solnRoot) {
        return Status(ErrorCodes::BadValue,
                      str::stream() << "Failed to create data access plan from cache. Query: "
                                    << query.toStringShort());

    // Takes ownership of 'solnRoot'.
    QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
    if (!soln) {
        return Status(ErrorCodes::BadValue,
                          << "Failed to analyze plan from cache. Query: " << query.toStringShort());

    LOG(5) << "Planner: solution constructed from the cache:\n" << soln->toString();
    *out = soln;
    return Status::OK();
예제 #6
    // static
    void QueryPlanner::plan(const CanonicalQuery& query,
                            const QueryPlannerParams& params,
                            vector<QuerySolution*>* out) {
        QLOG() << "=============================\n"
               << "Beginning planning, options = " << optionString(params.options) << endl
               << "Canonical query:\n" << query.toString() << endl
               << "============================="
               << endl;

        // The shortcut formerly known as IDHACK.  See if it's a simple _id query.  If so we might
        // just make an ixscan over the _id index and bypass the rest of planning entirely.
        if (!query.getParsed().isExplain() && !query.getParsed().showDiskLoc()
            && isSimpleIdQuery(query.getParsed().getFilter())
            && !query.getParsed().hasOption(QueryOption_CursorTailable)) {

            // See if we can find an _id index.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    const IndexEntry& index = params.indices[i];
                    QLOG() << "IDHACK using index " << index.toString() << endl;

                    // If so, we make a simple scan to find the doc.
                    IndexScanNode* isn = new IndexScanNode();
                    isn->indexKeyPattern = index.keyPattern;
                    isn->indexIsMultiKey = index.multikey;
                    isn->direction = 1;
                    isn->bounds.isSimpleRange = true;
                    BSONObj key = getKeyFromQuery(index.keyPattern, query.getParsed().getFilter());
                    isn->bounds.startKey = isn->bounds.endKey = key;
                    isn->bounds.endKeyInclusive = true;

                    QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, isn);

                    if (NULL != soln) {
                        QLOG() << "IDHACK solution is:\n" << (*out)[0]->toString() << endl;
                        // And that's it.

        for (size_t i = 0; i < params.indices.size(); ++i) {
            QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl;

        bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

        // If the query requests a tailable cursor, the only solution is a collscan + filter with
        // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
        // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
        // can't provide one.  Is this what we want?
        if (query.getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
                && canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, true, params);
                if (NULL != soln) {

        // The hint can be $natural: 1.  If this happens, output a collscan.  It's a weird way of
        // saying "table scan for two, please."
        if (!query.getParsed().getHint().isEmpty()) {
            BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural");
            if (!natural.eoo()) {
                QLOG() << "forcing a table scan due to hinted $natural\n";
                if (canTableScan) {
                    QuerySolution* soln = buildCollscanSoln(query, false, params);
                    if (NULL != soln) {

        // NOR and NOT we can't handle well with indices.  If we see them here, they weren't
        // rewritten to remove the negation.  Just output a collscan for those.
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOT)
            || QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOR)) {

            // If there's a near predicate, we can't handle this.
            // TODO: Should canonicalized query detect this?
            if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)) {
                warning() << "Can't handle NOT/NOR with GEO_NEAR";
            QLOG() << "NOT/NOR in plan, just outtping a collscan\n";
            if (canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, false, params);
                if (NULL != soln) {

        // Figure out what fields we care about.
        unordered_set<string> fields;
        QueryPlannerIXSelect::getFields(query.root(), "", &fields);

        for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
            QLOG() << "predicate over field " << *it << endl;

        // Filter our indices so we only look at indices that are over our predicates.
        vector<IndexEntry> relevantIndices;

        // Hints require us to only consider the hinted index.
        BSONObj hintIndex = query.getParsed().getHint();

        // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
        // plan.  If that fails, just scan the _id index.
        if (query.getParsed().isSnapshot()) {
            // Find the ID index in indexKeyPatterns.  It's our hint.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    hintIndex = params.indices[i].keyPattern;

        size_t hintIndexNumber = numeric_limits<size_t>::max();

        if (!hintIndex.isEmpty()) {
            // Sigh.  If the hint is specified it might be using the index name.
            BSONElement firstHintElt = hintIndex.firstElement();
            if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
                string hintName = firstHintElt.String();
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (params.indices[i].name == hintName) {
                        QLOG() << "hint by name specified, restricting indices to "
                             << params.indices[i].keyPattern.toString() << endl;
                        hintIndexNumber = i;
                        hintIndex = params.indices[i].keyPattern;
            else {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                        QLOG() << "hint specified, restricting indices to " << hintIndex.toString()
                             << endl;
                        hintIndexNumber = i;

            if (hintIndexNumber == numeric_limits<size_t>::max()) {
                // This is supposed to be an error.
                warning() << "Can't find hint for " << hintIndex.toString();
        else {
            QLOG() << "Finding relevant indices\n";
            QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);

        for (size_t i = 0; i < relevantIndices.size(); ++i) {
            QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl;

        // Figure out how useful each index is to each predicate.
        // query.root() is now annotated with RelevantTag(s).
        QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);

        QLOG() << "rated tree" << endl;
        QLOG() << query.root()->toString() << endl;

        // If there is a GEO_NEAR it must have an index it can use directly.
        // XXX: move into data access?
        MatchExpression* gnNode = NULL;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
            // No index for GEO_NEAR?  No query.
            RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {

            GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode);

            vector<size_t> newFirst;

            // 2d + GEO_NEAR is annoying.  Because 2d's GEO_NEAR isn't streaming we have to embed
            // the full query tree inside it as a matcher.
            for (size_t i = 0; i < tag->first.size(); ++i) {
                // GEO_NEAR has a non-2d index it can use.  We can deal w/that in normal planning.
                if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) {

                // If we're here, GEO_NEAR has a 2d index.  We create a 2dgeonear plan with the
                // entire tree as a filter, if possible.

                GeoNear2DNode* solnRoot = new GeoNear2DNode();
                solnRoot->nq = gnme->getData();

                if (MatchExpression::GEO_NEAR != query.root()->matchType()) {
                    // root is an AND, clone and delete the GEO_NEAR child.
                    MatchExpression* filterTree = query.root()->shallowClone();
                    verify(MatchExpression::AND == filterTree->matchType());

                    bool foundChild = false;
                    for (size_t i = 0; i < filterTree->numChildren(); ++i) {
                        if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) {
                            foundChild = true;
                            filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i);

                solnRoot->numWanted = query.getParsed().getNumToReturn();
                if (0 == solnRoot->numWanted) {
                    solnRoot->numWanted = 100;
                solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern;

                // Remove the 2d index.  2d can only be the first field, and we know there is
                // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll
                // only be first for gnNode.
                tag->first.erase(tag->first.begin() + i);

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);

                if (NULL != soln) {

            // Continue planning w/non-2d indices tagged for this pred.

            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {

        // Likewise, if there is a TEXT it must have an index it can use directly.
        MatchExpression* textNode;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
            RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {

        // If we have any relevant indices, we try to create indexed plans.
        if (0 < relevantIndices.size()) {
            // The enumerator spits out trees tagged with IndexTag(s).
            PlanEnumerator isp(query.root(), &relevantIndices);

            MatchExpression* rawTree;
            while (isp.getNext(&rawTree)) {
                QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString()
                       << endl;

                // This can fail if enumeration makes a mistake.
                QuerySolutionNode* solnRoot =
                    QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices);

                if (NULL == solnRoot) { continue; }

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
                if (NULL != soln) {
                    QLOG() << "Planner: adding solution:\n" << soln->toString() << endl;

        QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n";

        // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
        // scan the entire index to provide results and output that as our plan.  This is the
        // desired behavior when an index is hinted that is not relevant to the query.
        if (!hintIndex.isEmpty() && (0 == out->size())) {
            QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
            if (NULL != soln) {
                QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl;

        // If a sort order is requested, there may be an index that provides it, even if that
        // index is not over any predicates in the query.
        // XXX XXX: Can we do this even if the index is sparse?  Might we miss things?
        if (!query.getParsed().getSort().isEmpty()
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {

            // See if we have a sort provided from an index already.
            bool usingIndexToSort = false;
            for (size_t i = 0; i < out->size(); ++i) {
                QuerySolution* soln = (*out)[i];
                if (!soln->hasSortStage) {
                    usingIndexToSort = true;

            if (!usingIndexToSort) {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const BSONObj& kp = params.indices[i].keyPattern;
                    if (providesSort(query, kp)) {
                        QLOG() << "Planner: outputting soln that uses index to provide sort."
                               << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                        if (NULL != soln) {
                    if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                        QLOG() << "Planner: outputting soln that uses (reverse) index "
                               << "to provide sort." << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                        if (NULL != soln) {

        // TODO: Do we always want to offer a collscan solution?
        // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs.
        if (    !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
             && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)
             && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan)))
            QuerySolution* collscan = buildCollscanSoln(query, false, params);
            if (NULL != collscan) {
                QLOG() << "Planner: outputting a collscan:\n";
                QLOG() << collscan->toString() << endl;
예제 #7
    Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
        // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
        // execution work that happens here, so this is needed for the time accounting to
        // make sense.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        size_t numWorks = getTrialPeriodWorks(_txn, _collection);
        size_t numResults = getTrialPeriodNumToReturn(*_query);

        // Work the plans, stopping when a plan hits EOF or returns some
        // fixed number of results.
        for (size_t ix = 0; ix < numWorks; ++ix) {
            bool moreToDo = workAllPlans(numResults, yieldPolicy);
            if (!moreToDo) { break; }

        if (_failure) {
            invariant(WorkingSet::INVALID_ID != _statusMemberId);
            WorkingSetMember* member = _candidates[0].ws->get(_statusMemberId);
            return WorkingSetCommon::getMemberStatus(*member);

        // After picking best plan, ranking will own plan stats from
        // candidate solutions (winner and losers).
        std::auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision);
        _bestPlanIdx = PlanRanker::pickBestPlan(_candidates, ranking.get());
        verify(_bestPlanIdx >= 0 && _bestPlanIdx < static_cast<int>(_candidates.size()));

        // Copy candidate order. We will need this to sort candidate stats for explain
        // after transferring ownership of 'ranking' to plan cache.
        std::vector<size_t> candidateOrder = ranking->candidateOrder;

        CandidatePlan& bestCandidate = _candidates[_bestPlanIdx];
        std::list<WorkingSetID>& alreadyProduced = bestCandidate.results;
        QuerySolution* bestSolution = bestCandidate.solution;

        LOG(5) << "Winning solution:\n" << bestSolution->toString() << endl;
        LOG(2) << "Winning plan: " << Explain::getPlanSummary(bestCandidate.root);

        _backupPlanIdx = kNoSuchPlan;
        if (bestSolution->hasBlockingStage && (0 == alreadyProduced.size())) {
            LOG(5) << "Winner has blocking stage, looking for backup plan...\n";
            for (size_t ix = 0; ix < _candidates.size(); ++ix) {
                if (!_candidates[ix].solution->hasBlockingStage) {
                    LOG(5) << "Candidate " << ix << " is backup child\n";
                    _backupPlanIdx = ix;

        // Store the choice we just made in the cache, if the query is of a type that is safe to
        // cache.
        if (PlanCache::shouldCacheQuery(*_query) && _shouldCache) {
            // Create list of candidate solutions for the cache with
            // the best solution at the front.
            std::vector<QuerySolution*> solutions;

            // Generate solutions and ranking decisions sorted by score.
            for (size_t orderingIndex = 0;
                 orderingIndex < candidateOrder.size(); ++orderingIndex) {
                // index into candidates/ranking
                size_t ix = candidateOrder[orderingIndex];

            // Check solution cache data. Do not add to cache if
            // we have any invalid SolutionCacheData data.
            // XXX: One known example is 2D queries
            bool validSolutions = true;
            for (size_t ix = 0; ix < solutions.size(); ++ix) {
                if (NULL == solutions[ix]->cacheData.get()) {
                    LOG(5) << "Not caching query because this solution has no cache data: "
                           << solutions[ix]->toString();
                    validSolutions = false;

            if (validSolutions) {
                _collection->infoCache()->getPlanCache()->add(*_query, solutions, ranking.release());

        return Status::OK();
예제 #8
    void MultiPlanStage::pickBestPlan() {
        // Run each plan some number of times. This number is at least as great as
        // 'internalQueryPlanEvaluationWorks', but may be larger for big collections.
        size_t numWorks = internalQueryPlanEvaluationWorks;
        if (NULL != _collection) {
            // For large collections, the number of works is set to be this
            // fraction of the collection size.
            double fraction = internalQueryPlanEvaluationCollFraction;

            numWorks = std::max(size_t(internalQueryPlanEvaluationWorks),
                                size_t(fraction * _collection->numRecords()));

        // We treat ntoreturn as though it is a limit during plan ranking.
        // This means that ranking might not be great for sort + batchSize.
        // But it also means that we don't buffer too much data for sort + limit.
        // See SERVER-14174 for details.
        size_t numToReturn = _query->getParsed().getNumToReturn();

        // Determine the number of results which we will produce during the plan
        // ranking phase before stopping.
        size_t numResults = (size_t)internalQueryPlanEvaluationMaxResults;
        if (numToReturn > 0) {
            numResults = std::min(numToReturn, numResults);

        // Work the plans, stopping when a plan hits EOF or returns some
        // fixed number of results.
        for (size_t ix = 0; ix < numWorks; ++ix) {
            bool moreToDo = workAllPlans(numResults);
            if (!moreToDo) { break; }

        if (_failure) { return; }

        // After picking best plan, ranking will own plan stats from
        // candidate solutions (winner and losers).
        std::auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision);
        _bestPlanIdx = PlanRanker::pickBestPlan(_candidates, ranking.get());
        verify(_bestPlanIdx >= 0 && _bestPlanIdx < static_cast<int>(_candidates.size()));

        // Copy candidate order. We will need this to sort candidate stats for explain
        // after transferring ownership of 'ranking' to plan cache.
        std::vector<size_t> candidateOrder = ranking->candidateOrder;

        CandidatePlan& bestCandidate = _candidates[_bestPlanIdx];
        std::list<WorkingSetID>& alreadyProduced = bestCandidate.results;
        QuerySolution* bestSolution = bestCandidate.solution;

        QLOG() << "Winning solution:\n" << bestSolution->toString() << endl;
        LOG(2) << "Winning plan: " << getPlanSummary(*bestSolution);

        _backupPlanIdx = kNoSuchPlan;
        if (bestSolution->hasBlockingStage && (0 == alreadyProduced.size())) {
            QLOG() << "Winner has blocking stage, looking for backup plan...\n";
            for (size_t ix = 0; ix < _candidates.size(); ++ix) {
                if (!_candidates[ix].solution->hasBlockingStage) {
                    QLOG() << "Candidate " << ix << " is backup child\n";
                    _backupPlanIdx = ix;

        // Store the choice we just made in the cache. In order to do so,
        //   1) the query must be of a type that is safe to cache, and
        //   2) two or more plans cannot have tied for the win. Caching in the
        //   case of ties can cause successive queries of the same shape to
        //   use a bad index.
        if (PlanCache::shouldCacheQuery(*_query) && !ranking->tieForBest) {
            // Create list of candidate solutions for the cache with
            // the best solution at the front.
            std::vector<QuerySolution*> solutions;

            // Generate solutions and ranking decisions sorted by score.
            for (size_t orderingIndex = 0;
                 orderingIndex < candidateOrder.size(); ++orderingIndex) {
                // index into candidates/ranking
                size_t ix = candidateOrder[orderingIndex];

            // Check solution cache data. Do not add to cache if
            // we have any invalid SolutionCacheData data.
            // XXX: One known example is 2D queries
            bool validSolutions = true;
            for (size_t ix = 0; ix < solutions.size(); ++ix) {
                if (NULL == solutions[ix]->cacheData.get()) {
                    QLOG() << "Not caching query because this solution has no cache data: "
                           << solutions[ix]->toString();
                    validSolutions = false;

            if (validSolutions) {
                _collection->infoCache()->getPlanCache()->add(*_query, solutions, ranking.release());
예제 #9
    bool SubplanRunner::runSubplans() {
        // This is what we annotate with the index selections and then turn into a solution.
        auto_ptr<OrMatchExpression> theOr(

        // This is the skeleton of index selections that is inserted into the cache.
        auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());

        for (size_t i = 0; i < theOr->numChildren(); ++i) {
            MatchExpression* orChild = theOr->getChild(i);

            auto_ptr<CanonicalQuery> orChildCQ(_cqs.front());

            // 'solutions' is owned by the SubplanRunner instance until
            // it is popped from the queue.
            vector<QuerySolution*> solutions = _solutions.front();

            // We already checked for zero solutions in planSubqueries(...).

            if (1 == solutions.size()) {
                // There is only one solution. Transfer ownership to an auto_ptr.
                auto_ptr<QuerySolution> autoSoln(solutions[0]);

                // We want a well-formed *indexed* solution.
                if (NULL == autoSoln->cacheData.get()) {
                    // For example, we don't cache things for 2d indices.
                    QLOG() << "Subplanner: No cache data for subchild " << orChild->toString();
                    return false;

                if (SolutionCacheData::USE_INDEX_TAGS_SOLN != autoSoln->cacheData->solnType) {
                    QLOG() << "Subplanner: No indexed cache data for subchild "
                           << orChild->toString();
                    return false;

                // Add the index assignments to our original query.
                Status tagStatus = QueryPlanner::tagAccordingToCache(
                    orChild, autoSoln->cacheData->tree.get(), _indexMap);

                if (!tagStatus.isOK()) {
                    QLOG() << "Subplanner: Failed to extract indices from subchild "
                           << orChild->toString();
                    return false;

                // Add the child's cache data to the cache data we're creating for the main query.
            else {
                // N solutions, rank them.  Takes ownership of orChildCQ.

                // the working set will be shared by the candidate plans and owned by the runner
                WorkingSet* sharedWorkingSet = new WorkingSet();

                MultiPlanStage* multiPlanStage = new MultiPlanStage(_collection,

                // Dump all the solutions into the MPR.
                for (size_t ix = 0; ix < solutions.size(); ++ix) {
                    PlanStage* nextPlanRoot;

                    // Owns first two arguments
                    multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet);

                if (! multiPlanStage->bestPlanChosen()) {
                    QLOG() << "Subplanner: Failed to pick best plan for subchild "
                           << orChildCQ->toString();
                    return false;

                Runner* mpr = new SingleSolutionRunner(_collection,


                if (_killed) {
                    QLOG() << "Subplanner: Killed while picking best plan for subchild "
                           << orChild->toString();
                    return false;

                QuerySolution* bestSoln = multiPlanStage->bestSolution();

                if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) {
                    QLOG() << "Subplanner: No indexed cache data for subchild "
                           << orChild->toString();
                    return false;

                // Add the index assignments to our original query.
                Status tagStatus = QueryPlanner::tagAccordingToCache(
                    orChild, bestSoln->cacheData->tree.get(), _indexMap);

                if (!tagStatus.isOK()) {
                    QLOG() << "Subplanner: Failed to extract indices from subchild "
                           << orChild->toString();
                    return false;


        // Must do this before using the planner functionality.

        // Use the cached index assignments to build solnRoot.  Takes ownership of 'theOr'
        QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
            *_query, theOr.release(), false, _plannerParams.indices);

        if (NULL == solnRoot) {
            QLOG() << "Subplanner: Failed to build indexed data path for subplanned query\n";
            return false;

        QLOG() << "Subplanner: fully tagged tree is " << solnRoot->toString();

        // Takes ownership of 'solnRoot'
        QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*_query,

        if (NULL == soln) {
            QLOG() << "Subplanner: Failed to analyze subplanned query";
            return false;

        // We want our franken-solution to be cached.
        SolutionCacheData* scd = new SolutionCacheData();

        QLOG() << "Subplanner: Composite solution is " << soln->toString() << endl;

        // We use one of these even if there is one plan.  We do this so that the entry is cached
        // with stats obtained in the same fashion as a competitive ranking would have obtained
        // them.
        MultiPlanStage* multiPlanStage = new MultiPlanStage(_collection, _query.get());
        WorkingSet* ws = new WorkingSet();
        PlanStage* root;
        verify(StageBuilder::build(_txn, _collection, *soln, ws, &root));
        multiPlanStage->addPlan(soln, root, ws); // Takes ownership first two arguments.

        if (! multiPlanStage->bestPlanChosen()) {
            QLOG() << "Subplanner: Failed to pick best plan for subchild "
                   << _query->toString();
            return false;

        Runner* mpr = new SingleSolutionRunner(_collection,

        return true;
예제 #10
    // static
    Status QueryPlanner::plan(const CanonicalQuery& query,
                              const QueryPlannerParams& params,
                              std::vector<QuerySolution*>* out) {

        QLOG() << "=============================\n"
               << "Beginning planning, options = " << optionString(params.options) << endl
               << "Canonical query:\n" << query.toString() << endl
               << "============================="
               << endl;

        for (size_t i = 0; i < params.indices.size(); ++i) {
            QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl;

        bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

        // If the query requests a tailable cursor, the only solution is a collscan + filter with
        // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
        // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
        // can't provide one.  Is this what we want?
        if (query.getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
                && canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, true, params);
                if (NULL != soln) {
            return Status::OK();

        // The hint can be $natural: 1.  If this happens, output a collscan.  It's a weird way of
        // saying "table scan for two, please."
        if (!query.getParsed().getHint().isEmpty()) {
            BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural");
            if (!natural.eoo()) {
                QLOG() << "forcing a table scan due to hinted $natural\n";
                // min/max are incompatible with $natural.
                if (canTableScan && query.getParsed().getMin().isEmpty()
                                 && query.getParsed().getMax().isEmpty()) {
                    QuerySolution* soln = buildCollscanSoln(query, false, params);
                    if (NULL != soln) {
                return Status::OK();

        // Figure out what fields we care about.
        unordered_set<string> fields;
        QueryPlannerIXSelect::getFields(query.root(), "", &fields);

        for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
            QLOG() << "predicate over field " << *it << endl;

        // Filter our indices so we only look at indices that are over our predicates.
        vector<IndexEntry> relevantIndices;

        // Hints require us to only consider the hinted index.
        BSONObj hintIndex = query.getParsed().getHint();

        // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
        // plan.  If that fails, just scan the _id index.
        if (query.getParsed().isSnapshot()) {
            // Find the ID index in indexKeyPatterns.  It's our hint.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    hintIndex = params.indices[i].keyPattern;

        size_t hintIndexNumber = numeric_limits<size_t>::max();

        if (hintIndex.isEmpty()) {
            QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);
        else {
            // Sigh.  If the hint is specified it might be using the index name.
            BSONElement firstHintElt = hintIndex.firstElement();
            if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
                string hintName = firstHintElt.String();
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (params.indices[i].name == hintName) {
                        QLOG() << "hint by name specified, restricting indices to "
                             << params.indices[i].keyPattern.toString() << endl;
                        hintIndexNumber = i;
                        hintIndex = params.indices[i].keyPattern;
            else {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                        QLOG() << "hint specified, restricting indices to " << hintIndex.toString()
                             << endl;
                        hintIndexNumber = i;

            if (hintIndexNumber == numeric_limits<size_t>::max()) {
                return Status(ErrorCodes::BadValue, "bad hint");

        // Deal with the .min() and .max() query options.  If either exist we can only use an index
        // that matches the object inside.
        if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) {
            BSONObj minObj = query.getParsed().getMin();
            BSONObj maxObj = query.getParsed().getMax();

            // This is the index into params.indices[...] that we use.
            size_t idxNo = numeric_limits<size_t>::max();

            // If there's an index hinted we need to be able to use it.
            if (!hintIndex.isEmpty()) {
                if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) {
                    QLOG() << "minobj doesnt work w hint";
                    return Status(ErrorCodes::BadValue,
                                  "hint provided does not work with min query");

                if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) {
                    QLOG() << "maxobj doesnt work w hint";
                    return Status(ErrorCodes::BadValue,
                                  "hint provided does not work with max query");

                idxNo = hintIndexNumber;
            else {
                // No hinted index, look for one that is compatible (has same field names and
                // ordering thereof).
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const BSONObj& kp = params.indices[i].keyPattern;

                    BSONObj toUse = minObj.isEmpty() ? maxObj : minObj;
                    if (indexCompatibleMaxMin(toUse, kp)) {
                        idxNo = i;
            if (idxNo == numeric_limits<size_t>::max()) {
                QLOG() << "Can't find relevant index to use for max/min query";
                // Can't find an index to use, bail out.
                return Status(ErrorCodes::BadValue,
                              "unable to find relevant index for max/min query");

            // maxObj can be empty; the index scan just goes until the end.  minObj can't be empty
            // though, so if it is, we make a minKey object.
            if (minObj.isEmpty()) {
                BSONObjBuilder bob;
                minObj = bob.obj();
            else {
                // Must strip off the field names to make an index key.
                minObj = stripFieldNames(minObj);

            if (!maxObj.isEmpty()) {
                // Must strip off the field names to make an index key.
                maxObj = stripFieldNames(maxObj);

            QLOG() << "max/min query using index " << params.indices[idxNo].toString() << endl;

            // Make our scan and output.
            QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan(params.indices[idxNo],

            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
            if (NULL != soln) {

            return Status::OK();

        for (size_t i = 0; i < relevantIndices.size(); ++i) {
            QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl;

        // Figure out how useful each index is to each predicate.
        // query.root() is now annotated with RelevantTag(s).
        QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);

        QLOG() << "rated tree" << endl;
        QLOG() << query.root()->toString() << endl;

        // If there is a GEO_NEAR it must have an index it can use directly.
        // XXX: move into data access?
        MatchExpression* gnNode = NULL;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
            // No index for GEO_NEAR?  No query.
            RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                QLOG() << "unable to find index for $geoNear query" << endl;
                return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query");

            GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode);

            vector<size_t> newFirst;

            // 2d + GEO_NEAR is annoying.  Because 2d's GEO_NEAR isn't streaming we have to embed
            // the full query tree inside it as a matcher.
            for (size_t i = 0; i < tag->first.size(); ++i) {
                // GEO_NEAR has a non-2d index it can use.  We can deal w/that in normal planning.
                if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) {

                // If we're here, GEO_NEAR has a 2d index.  We create a 2dgeonear plan with the
                // entire tree as a filter, if possible.

                GeoNear2DNode* solnRoot = new GeoNear2DNode();
                solnRoot->nq = gnme->getData();
                if (NULL != query.getProj()) {
                    solnRoot->addPointMeta = query.getProj()->wantGeoNearPoint();
                    solnRoot->addDistMeta = query.getProj()->wantGeoNearDistance();

                if (MatchExpression::GEO_NEAR != query.root()->matchType()) {
                    // root is an AND, clone and delete the GEO_NEAR child.
                    MatchExpression* filterTree = query.root()->shallowClone();
                    verify(MatchExpression::AND == filterTree->matchType());

                    bool foundChild = false;
                    for (size_t i = 0; i < filterTree->numChildren(); ++i) {
                        if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) {
                            foundChild = true;
                            filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i);

                solnRoot->numWanted = query.getParsed().getNumToReturn();
                if (0 == solnRoot->numWanted) {
                    solnRoot->numWanted = 100;
                solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern;

                // Remove the 2d index.  2d can only be the first field, and we know there is
                // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll
                // only be first for gnNode.
                tag->first.erase(tag->first.begin() + i);

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);

                if (NULL != soln) {

            // Continue planning w/non-2d indices tagged for this pred.

            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return Status::OK();

        // Likewise, if there is a TEXT it must have an index it can use directly.
        MatchExpression* textNode;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
            RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return Status::OK();

        // If we have any relevant indices, we try to create indexed plans.
        if (0 < relevantIndices.size()) {
            // The enumerator spits out trees tagged with IndexTag(s).
            PlanEnumeratorParams enumParams;
            enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION;
            enumParams.root = query.root();
            enumParams.indices = &relevantIndices;

            PlanEnumerator isp(enumParams);

            MatchExpression* rawTree;
            // XXX: have limit on # of indexed solns we'll consider.  We could have a perverse
            // query and index that could make n^2 very unpleasant.
            while (isp.getNext(&rawTree)) {
                QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString()
                       << endl;

                // This can fail if enumeration makes a mistake.
                QuerySolutionNode* solnRoot =
                    QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices);

                if (NULL == solnRoot) { continue; }

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
                if (NULL != soln) {
                    QLOG() << "Planner: adding solution:\n" << soln->toString() << endl;

        QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n";

        // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
        // scan the entire index to provide results and output that as our plan.  This is the
        // desired behavior when an index is hinted that is not relevant to the query.
        if (!hintIndex.isEmpty()) {
            if (0 == out->size()) {
                QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
                verify(NULL != soln);
                QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl;
            return Status::OK();

        // If a sort order is requested, there may be an index that provides it, even if that
        // index is not over any predicates in the query.
        if (!query.getParsed().getSort().isEmpty()
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {

            // See if we have a sort provided from an index already.
            bool usingIndexToSort = false;
            for (size_t i = 0; i < out->size(); ++i) {
                QuerySolution* soln = (*out)[i];
                if (!soln->hasSortStage) {
                    usingIndexToSort = true;

            if (!usingIndexToSort) {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const IndexEntry& index = params.indices[i];
                    if (index.sparse) {
                    const BSONObj kp = LiteParsedQuery::normalizeSortOrder(index.keyPattern);
                    if (providesSort(query, kp)) {
                        QLOG() << "Planner: outputting soln that uses index to provide sort."
                               << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                        if (NULL != soln) {
                    if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                        QLOG() << "Planner: outputting soln that uses (reverse) index "
                               << "to provide sort." << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                        if (NULL != soln) {

        // TODO: Do we always want to offer a collscan solution?
        // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs.
        if (    !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
             && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)
             && hintIndex.isEmpty()
             && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan)))
            QuerySolution* collscan = buildCollscanSoln(query, false, params);
            if (NULL != collscan) {
                QLOG() << "Planner: outputting a collscan:\n";
                QLOG() << collscan->toString() << endl;

        return Status::OK();