Status AuthzManagerExternalStateMock::_queryVector(
            const NamespaceString& collectionName,
            const BSONObj& query,
            std::vector<BSONObjCollection::iterator>* result) {

        StatusWithMatchExpression parseResult = MatchExpressionParser::parse(query);
        if (!parseResult.isOK()) {
            return parseResult.getStatus();
        }
        MatchExpression* matcher = parseResult.getValue();

        NamespaceDocumentMap::iterator mapIt = _documents.find(collectionName);
        if (mapIt == _documents.end())
            return Status(ErrorCodes::NoMatchingDocument,
                          "No collection named " + collectionName.ns());

        for (BSONObjCollection::iterator vecIt = mapIt->second.begin();
             vecIt != mapIt->second.end();
             ++vecIt) {

            if (matcher->matchesBSON(*vecIt)) {
                result->push_back(vecIt);
            }
        }
        return Status::OK();
    }
Example #2
0
// static
MatchExpression* CanonicalQuery::normalizeTree(MatchExpression* root) {
    // root->isLogical() is true now.  We care about AND, OR, and NOT. NOR currently scares us.
    if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) {
        // We could have AND of AND of AND.  Make sure we clean up our children before merging
        // them.
        // UNITTEST 11738048
        for (size_t i = 0; i < root->getChildVector()->size(); ++i) {
            (*root->getChildVector())[i] = normalizeTree(root->getChild(i));
        }

        // If any of our children are of the same logical operator that we are, we remove the
        // child's children and append them to ourselves after we examine all children.
        std::vector<MatchExpression*> absorbedChildren;

        for (size_t i = 0; i < root->numChildren();) {
            MatchExpression* child = root->getChild(i);
            if (child->matchType() == root->matchType()) {
                // AND of an AND or OR of an OR.  Absorb child's children into ourself.
                for (size_t j = 0; j < child->numChildren(); ++j) {
                    absorbedChildren.push_back(child->getChild(j));
                }
                // TODO(opt): this is possibly n^2-ish
                root->getChildVector()->erase(root->getChildVector()->begin() + i);
                child->getChildVector()->clear();
                // Note that this only works because we cleared the child's children
                delete child;
                // Don't increment 'i' as the current child 'i' used to be child 'i+1'
            } else {
                ++i;
            }
        }

        root->getChildVector()->insert(
            root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end());

        // AND of 1 thing is the thing, OR of 1 thing is the thing.
        if (1 == root->numChildren()) {
            MatchExpression* ret = root->getChild(0);
            root->getChildVector()->clear();
            delete root;
            return ret;
        }
    } else if (MatchExpression::NOT == root->matchType()) {
        // Normalize the rest of the tree hanging off this NOT node.
        NotMatchExpression* nme = static_cast<NotMatchExpression*>(root);
        MatchExpression* child = nme->releaseChild();
        // normalizeTree(...) takes ownership of 'child', and then
        // transfers ownership of its return value to 'nme'.
        nme->resetChild(normalizeTree(child));
    } else if (MatchExpression::ELEM_MATCH_VALUE == root->matchType()) {
        // Just normalize our children.
        for (size_t i = 0; i < root->getChildVector()->size(); ++i) {
            (*root->getChildVector())[i] = normalizeTree(root->getChild(i));
        }
    }

    return root;
}
    TEST( MatchExpressionParserText, Parse1 ) {
        BSONObj query = fromjson( "{$text:{$search:\"awesome\", $language:\"english\"}}" );

        StatusWithMatchExpression result = MatchExpressionParser::parse( query );
        ASSERT_TRUE( result.isOK() );

        MatchExpression* exp = result.getValue();
        ASSERT_EQUALS( MatchExpression::TEXT, exp->matchType() );

        TextMatchExpression* textExp = static_cast<TextMatchExpression*>( exp );
        ASSERT_EQUALS( textExp->getQuery(), "awesome" );
        ASSERT_EQUALS( textExp->getLanguage(), "english" );
    }
    TEST( MatchExpressionParserGeoNear, ParseNear ) {
        BSONObj query = fromjson("{loc:{$near:{$maxDistance:100, "
                                 "$geometry:{type:\"Point\", coordinates:[0,0]}}}}");

        StatusWithMatchExpression result = MatchExpressionParser::parse( query );
        ASSERT_TRUE( result.isOK() );

        MatchExpression* exp = result.getValue();
        ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType());

        GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
        ASSERT_EQUALS(gnexp->getData().maxDistance, 100);
    }
// For $near, $nearSphere, and $geoNear syntax of:
// {
//   $near/$nearSphere/$geoNear: [ <x>, <y> ],
//   $minDistance: <distance in radians>,
//   $maxDistance: <distance in radians>
// }
TEST(MatchExpressionParserGeoNear, ParseValidNear) {
    BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}");

    StatusWithMatchExpression result = MatchExpressionParser::parse(query);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQ(gnexp->getData().maxDistance, 100);
    ASSERT_EQ(gnexp->getData().minDistance, 50);
}
// For $near, $nearSphere, and $geoNear syntax of:
// {
//   $near/$nearSphere/$geoNear: [ <x>, <y> ],
//   $minDistance: <distance in radians>,
//   $maxDistance: <distance in radians>
// }
TEST(MatchExpressionParserGeoNear, ParseValidNear) {
    BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}");

    const CollatorInterface* collator = nullptr;
    StatusWithMatchExpression result =
        MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQ(gnexp->getData().maxDistance, 100);
    ASSERT_EQ(gnexp->getData().minDistance, 50);
}
Example #7
0
    // static
    MatchExpression* CanonicalQuery::normalizeTree(MatchExpression* root) {
        // root->isLogical() is true now.  We care about AND and OR.  Negations currently scare us.
        if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) {
            // We could have AND of AND of AND.  Make sure we clean up our children before merging
            // them.
            // UNITTEST 11738048
            for (size_t i = 0; i < root->getChildVector()->size(); ++i) {
                (*root->getChildVector())[i] = normalizeTree(root->getChild(i));
            }

            // If any of our children are of the same logical operator that we are, we remove the
            // child's children and append them to ourselves after we examine all children.
            vector<MatchExpression*> absorbedChildren;

            for (size_t i = 0; i < root->numChildren();) {
                MatchExpression* child = root->getChild(i);
                if (child->matchType() == root->matchType()) {
                    // AND of an AND or OR of an OR.  Absorb child's children into ourself.
                    for (size_t j = 0; j < child->numChildren(); ++j) {
                        absorbedChildren.push_back(child->getChild(j));
                    }
                    // TODO(opt): this is possibly n^2-ish
                    root->getChildVector()->erase(root->getChildVector()->begin() + i);
                    child->getChildVector()->clear();
                    // Note that this only works because we cleared the child's children
                    delete child;
                    // Don't increment 'i' as the current child 'i' used to be child 'i+1'
                }
                else {
                    ++i;
                }
            }

            root->getChildVector()->insert(root->getChildVector()->end(),
                                           absorbedChildren.begin(),
                                           absorbedChildren.end());

            // AND of 1 thing is the thing, OR of 1 thing is the thing.
            if (1 == root->numChildren()) {
                MatchExpression* ret = root->getChild(0);
                root->getChildVector()->clear();
                delete root;
                return ret;
            }
        }

        return root;
    }
bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) {
    if (expr.isLogical()) {
        // Any logical expression is independent of 'pathSet' if all its children are independent of
        // 'pathSet'.
        for (size_t i = 0; i < expr.numChildren(); i++) {
            if (!isIndependentOf(*expr.getChild(i), pathSet)) {
                return false;
            }
        }
        return true;
    }

    // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to
    // determine if it is independent or not, and instead just return false.
    return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet);
}
Example #9
0
 void tagForSort(MatchExpression* tree) {
     if (!Indexability::nodeCanUseIndexOnOwnField(tree)) {
         size_t myTagValue = IndexTag::kNoIndex;
         for (size_t i = 0; i < tree->numChildren(); ++i) {
             MatchExpression* child = tree->getChild(i);
             tagForSort(child);
             IndexTag* childTag = static_cast<IndexTag*>(child->getTag());
             if (NULL != childTag) {
                 myTagValue = std::min(myTagValue, childTag->index);
             }
         }
         if (myTagValue != IndexTag::kNoIndex) {
             tree->setTag(new IndexTag(myTagValue));
         }
     }
 }
TEST(MatchExpressionParserGeoNear, ParseNear) {
    BSONObj query = fromjson(
        "{loc:{$near:{$maxDistance:100, "
        "$geometry:{type:\"Point\", coordinates:[0,0]}}}}");

    const CollatorInterface* collator = nullptr;
    StatusWithMatchExpression result =
        MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQUALS(gnexp->getData().maxDistance, 100);
}
Example #11
0
    // static
    // XXX TODO: This does not belong here at all.
    MatchExpression* CanonicalQuery::logicalRewrite(MatchExpression* tree) {
        // Only thing we do is pull an OR up at the root.
        if (MatchExpression::AND != tree->matchType()) {
            return tree;
        }

        // We want to bail out ASAP if we have nothing to do here.
        size_t numOrs = 0;
        for (size_t i = 0; i < tree->numChildren(); ++i) {
            if (MatchExpression::OR == tree->getChild(i)->matchType()) {
                ++numOrs;
            }
        }

        // Only do this for one OR right now.
        if (1 != numOrs) {
            return tree;
        }

        // Detach the OR from the root.
        invariant(NULL != tree->getChildVector());
        std::vector<MatchExpression*>& rootChildren = *tree->getChildVector();
        MatchExpression* orChild = NULL;
        for (size_t i = 0; i < rootChildren.size(); ++i) {
            if (MatchExpression::OR == rootChildren[i]->matchType()) {
                orChild = rootChildren[i];
                rootChildren.erase(rootChildren.begin() + i);
                break;
            }
        }

        // AND the existing root with each or child.
        invariant(NULL != orChild);
        invariant(NULL != orChild->getChildVector());
        std::vector<MatchExpression*>& orChildren = *orChild->getChildVector();
        for (size_t i = 0; i < orChildren.size(); ++i) {
            AndMatchExpression* ama = new AndMatchExpression();
            ama->add(orChildren[i]);
            ama->add(tree->shallowClone());
            orChildren[i] = ama;
        }
        delete tree;

        // Clean up any consequences from this tomfoolery.
        return normalizeTree(orChild);
    }
Example #12
0
    static Status _extractFullEqualityMatches(const MatchExpression& root,
                                              const FieldRefSet* fullPathsToExtract,
                                              EqualityMatches* equalities) {

        if (root.matchType() == MatchExpression::EQ) {

            // Extract equality matches
            const EqualityMatchExpression& eqChild =
                static_cast<const EqualityMatchExpression&>(root);

            FieldRef path(eqChild.path());

            if (fullPathsToExtract) {

                FieldRefSet conflictPaths;
                fullPathsToExtract->findConflicts(&path, &conflictPaths);

                // Ignore if this path is unrelated to the full paths
                if (conflictPaths.empty())
                    return Status::OK();

                // Make sure we're a prefix of all the conflict paths
                Status status = checkPathIsPrefixOf(path, conflictPaths);
                if (!status.isOK())
                    return status;
            }

            Status status = checkEqualityConflicts(*equalities, path);
            if (!status.isOK())
                return status;

            equalities->insert(make_pair(eqChild.path(), &eqChild));
        }
        else if (root.matchType() == MatchExpression::AND) {

            // Further explore $and matches
            for (size_t i = 0; i < root.numChildren(); ++i) {
                MatchExpression* child = root.getChild(i);
                Status status = _extractFullEqualityMatches(*child, fullPathsToExtract, equalities);
                if (!status.isOK())
                    return status;
            }
        }

        return Status::OK();
    }
Example #13
0
    // static
    void QueryPlannerIXSelect::stripUnneededAssignments(MatchExpression* node,
                                                        const std::vector<IndexEntry>& indices) {
        if (MatchExpression::AND == node->matchType()) {
            for (size_t i = 0; i < node->numChildren(); i++) {
                MatchExpression* child = node->getChild(i);

                if (MatchExpression::EQ != child->matchType()) {
                    continue;
                }

                if (!child->getTag()) {
                    continue;
                }

                // We found a EQ child of an AND which is tagged.
                RelevantTag* rt = static_cast<RelevantTag*>(child->getTag());

                // Look through all of the indices for which this predicate can be answered with
                // the leading field of the index.
                for (std::vector<size_t>::const_iterator i = rt->first.begin();
                        i != rt->first.end(); ++i) {
                    size_t index = *i;

                    if (indices[index].unique && 1 == indices[index].keyPattern.nFields()) {
                        // Found an EQ predicate which can use a single-field unique index.
                        // Clear assignments from the entire tree, and add back a single assignment
                        // for 'child' to the unique index.
                        clearAssignments(node);
                        RelevantTag* newRt = static_cast<RelevantTag*>(child->getTag());
                        newRt->first.push_back(index);

                        // Tag state has been reset in the entire subtree at 'root'; nothing
                        // else for us to do.
                        return;
                    }
                }
            }
        }

        for (size_t i = 0; i < node->numChildren(); i++) {
            stripUnneededAssignments(node->getChild(i), indices);
        }
    }
TEST(MatchExpressionParserGeoNear, ParseValidNearSphere) {
    BSONObj query = fromjson("{loc: {$nearSphere: [0,0], $maxDistance: 100, $minDistance: 50}}");

    const CollatorInterface* collator = nullptr;
    const boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
    StatusWithMatchExpression result =
        MatchExpressionParser::parse(query,
                                     collator,
                                     expCtx,
                                     ExtensionsCallbackNoop(),
                                     MatchExpressionParser::kAllowAllSpecialFeatures);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQ(gnexp->getData().maxDistance, 100);
    ASSERT_EQ(gnexp->getData().minDistance, 50);
}
Example #15
0
    Status SubplanRunner::planSubqueries() {
        MatchExpression* theOr = _query->root();

        for (size_t i = 0; i < _plannerParams.indices.size(); ++i) {
            const IndexEntry& ie = _plannerParams.indices[i];
            _indexMap[ie.keyPattern] = i;
            QLOG() << "Subplanner: index " << i << " is " << ie.toString() << endl;
        }

        const WhereCallbackReal whereCallback(_collection->ns().db());

        for (size_t i = 0; i < theOr->numChildren(); ++i) {
            // Turn the i-th child into its own query.
            MatchExpression* orChild = theOr->getChild(i);
            CanonicalQuery* orChildCQ;
            Status childCQStatus = CanonicalQuery::canonicalize(*_query,
                                                                orChild,
                                                                &orChildCQ,
                                                                whereCallback);
            if (!childCQStatus.isOK()) {
                mongoutils::str::stream ss;
                ss << "Subplanner: Can't canonicalize subchild " << orChild->toString()
                   << " " << childCQStatus.reason();
                return Status(ErrorCodes::BadValue, ss);
            }

            // Make sure it gets cleaned up.
            auto_ptr<CanonicalQuery> safeOrChildCQ(orChildCQ);

            // Plan the i-th child.
            vector<QuerySolution*> solutions;

            // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from 
            // considering any plan that's a collscan.
            QLOG() << "Subplanner: planning child " << i << " of " << theOr->numChildren();
            Status status = QueryPlanner::plan(*safeOrChildCQ, _plannerParams, &solutions);

            if (!status.isOK()) {
                mongoutils::str::stream ss;
                ss << "Subplanner: Can't plan for subchild " << orChildCQ->toString()
                   << " " << status.reason();
                return Status(ErrorCodes::BadValue, ss);
            }
            QLOG() << "Subplanner: got " << solutions.size() << " solutions";

            if (0 == solutions.size()) {
                // If one child doesn't have an indexed solution, bail out.
                mongoutils::str::stream ss;
                ss << "Subplanner: No solutions for subchild " << orChildCQ->toString();
                return Status(ErrorCodes::BadValue, ss);
            }

            // Hang onto the canonicalized subqueries and the corresponding query solutions
            // so that they can be used in subplan running later on.
            _cqs.push(safeOrChildCQ.release());
            _solutions.push(solutions);
        }

        return Status::OK();
    }
    Status AuthzManagerExternalStateMock::_findUser(const std::string& usersNamespace,
                                                    const BSONObj& query,
                                                    BSONObj* result) const {
        StatusWithMatchExpression parseResult = MatchExpressionParser::parse(query);
        if (!parseResult.isOK()) {
            return parseResult.getStatus();
        }
        MatchExpression* matcher = parseResult.getValue();

        unordered_map<std::string, std::vector<BSONObj> >::const_iterator mapIt;
        for (mapIt = _userDocuments.begin(); mapIt != _userDocuments.end(); ++mapIt) {
            for (std::vector<BSONObj>::const_iterator vecIt = mapIt->second.begin();
                    vecIt != mapIt->second.end(); ++vecIt) {
                if (nsToDatabase(usersNamespace) == mapIt->first &&
                        matcher->matchesBSON(*vecIt)) {
                    *result = *vecIt;
                    return Status::OK();
                }
            }
        }
        return Status(ErrorCodes::UserNotFound, "User not found");
    }
Example #17
0
    void PlanEnumerator::tagMemo(size_t id) {
        QLOG() << "Tagging memoID " << id << endl;
        NodeAssignment* assign = _memo[id];
        verify(NULL != assign);

        if (NULL != assign->pred) {
            PredicateAssignment* pa = assign->pred.get();
            verify(NULL == pa->expr->getTag());
            verify(pa->indexToAssign < pa->first.size());
            pa->expr->setTag(new IndexTag(pa->first[pa->indexToAssign]));
        }
        else if (NULL != assign->orAssignment) {
            OrAssignment* oa = assign->orAssignment.get();
            for (size_t i = 0; i < oa->subnodes.size(); ++i) {
                tagMemo(oa->subnodes[i]);
            }
        }
        else if (NULL != assign->newAnd) {
            AndAssignment* aa = assign->newAnd.get();

            if (AndAssignment::MANDATORY == aa->state) {
                verify(aa->counter < aa->mandatory.size());
                const OneIndexAssignment& assign = aa->mandatory[aa->counter];
                for (size_t i = 0; i < assign.preds.size(); ++i) {
                    MatchExpression* pred = assign.preds[i];
                    verify(NULL == pred->getTag());
                    pred->setTag(new IndexTag(assign.index, assign.positions[i]));
                }
            }
            else if (AndAssignment::PRED_CHOICES == aa->state) {
                verify(aa->counter < aa->predChoices.size());
                const OneIndexAssignment& assign = aa->predChoices[aa->counter];
                for (size_t i = 0; i < assign.preds.size(); ++i) {
                    MatchExpression* pred = assign.preds[i];
                    verify(NULL == pred->getTag());
                    pred->setTag(new IndexTag(assign.index, assign.positions[i]));
                }
            }
            else {
                verify(AndAssignment::SUBNODES == aa->state);
                verify(aa->counter < aa->subnodes.size());
                tagMemo(aa->subnodes[aa->counter]);
            }
        }
        else {
            verify(0);
        }
    }
Example #18
0
    // static
    Status QueryPlanner::plan(const CanonicalQuery& query,
                              const QueryPlannerParams& params,
                              std::vector<QuerySolution*>* out) {

        QLOG() << "=============================\n"
               << "Beginning planning, options = " << optionString(params.options) << endl
               << "Canonical query:\n" << query.toString() << endl
               << "============================="
               << endl;

        for (size_t i = 0; i < params.indices.size(); ++i) {
            QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl;
        }

        bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

        // If the query requests a tailable cursor, the only solution is a collscan + filter with
        // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
        // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
        // can't provide one.  Is this what we want?
        if (query.getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
                && canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, true, params);
                if (NULL != soln) {
                    out->push_back(soln);
                }
            }
            return Status::OK();
        }

        // The hint can be $natural: 1.  If this happens, output a collscan.  It's a weird way of
        // saying "table scan for two, please."
        if (!query.getParsed().getHint().isEmpty()) {
            BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural");
            if (!natural.eoo()) {
                QLOG() << "forcing a table scan due to hinted $natural\n";
                // min/max are incompatible with $natural.
                if (canTableScan && query.getParsed().getMin().isEmpty()
                                 && query.getParsed().getMax().isEmpty()) {
                    QuerySolution* soln = buildCollscanSoln(query, false, params);
                    if (NULL != soln) {
                        out->push_back(soln);
                    }
                }
                return Status::OK();
            }
        }

        // Figure out what fields we care about.
        unordered_set<string> fields;
        QueryPlannerIXSelect::getFields(query.root(), "", &fields);

        for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
            QLOG() << "predicate over field " << *it << endl;
        }

        // Filter our indices so we only look at indices that are over our predicates.
        vector<IndexEntry> relevantIndices;

        // Hints require us to only consider the hinted index.
        BSONObj hintIndex = query.getParsed().getHint();

        // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
        // plan.  If that fails, just scan the _id index.
        if (query.getParsed().isSnapshot()) {
            // Find the ID index in indexKeyPatterns.  It's our hint.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    hintIndex = params.indices[i].keyPattern;
                    break;
                }
            }
        }

        size_t hintIndexNumber = numeric_limits<size_t>::max();

        if (hintIndex.isEmpty()) {
            QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);
        }
        else {
            // Sigh.  If the hint is specified it might be using the index name.
            BSONElement firstHintElt = hintIndex.firstElement();
            if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
                string hintName = firstHintElt.String();
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (params.indices[i].name == hintName) {
                        QLOG() << "hint by name specified, restricting indices to "
                             << params.indices[i].keyPattern.toString() << endl;
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        hintIndexNumber = i;
                        hintIndex = params.indices[i].keyPattern;
                        break;
                    }
                }
            }
            else {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        QLOG() << "hint specified, restricting indices to " << hintIndex.toString()
                             << endl;
                        hintIndexNumber = i;
                        break;
                    }
                }
            }

            if (hintIndexNumber == numeric_limits<size_t>::max()) {
                return Status(ErrorCodes::BadValue, "bad hint");
            }
        }

        // Deal with the .min() and .max() query options.  If either exist we can only use an index
        // that matches the object inside.
        if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) {
            BSONObj minObj = query.getParsed().getMin();
            BSONObj maxObj = query.getParsed().getMax();

            // This is the index into params.indices[...] that we use.
            size_t idxNo = numeric_limits<size_t>::max();

            // If there's an index hinted we need to be able to use it.
            if (!hintIndex.isEmpty()) {
                if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) {
                    QLOG() << "minobj doesnt work w hint";
                    return Status(ErrorCodes::BadValue,
                                  "hint provided does not work with min query");
                }

                if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) {
                    QLOG() << "maxobj doesnt work w hint";
                    return Status(ErrorCodes::BadValue,
                                  "hint provided does not work with max query");
                }

                idxNo = hintIndexNumber;
            }
            else {
                // No hinted index, look for one that is compatible (has same field names and
                // ordering thereof).
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const BSONObj& kp = params.indices[i].keyPattern;

                    BSONObj toUse = minObj.isEmpty() ? maxObj : minObj;
                    if (indexCompatibleMaxMin(toUse, kp)) {
                        idxNo = i;
                        break;
                    }
                }
            }
            
            if (idxNo == numeric_limits<size_t>::max()) {
                QLOG() << "Can't find relevant index to use for max/min query";
                // Can't find an index to use, bail out.
                return Status(ErrorCodes::BadValue,
                              "unable to find relevant index for max/min query");
            }

            // maxObj can be empty; the index scan just goes until the end.  minObj can't be empty
            // though, so if it is, we make a minKey object.
            if (minObj.isEmpty()) {
                BSONObjBuilder bob;
                bob.appendMinKey("");
                minObj = bob.obj();
            }
            else {
                // Must strip off the field names to make an index key.
                minObj = stripFieldNames(minObj);
            }

            if (!maxObj.isEmpty()) {
                // Must strip off the field names to make an index key.
                maxObj = stripFieldNames(maxObj);
            }

            QLOG() << "max/min query using index " << params.indices[idxNo].toString() << endl;

            // Make our scan and output.
            QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan(params.indices[idxNo],
                                                                            query,
                                                                            params,
                                                                            minObj,
                                                                            maxObj);

            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
            if (NULL != soln) {
                out->push_back(soln);
            }

            return Status::OK();
        }

        for (size_t i = 0; i < relevantIndices.size(); ++i) {
            QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl;
        }

        // Figure out how useful each index is to each predicate.
        // query.root() is now annotated with RelevantTag(s).
        QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);

        QLOG() << "rated tree" << endl;
        QLOG() << query.root()->toString() << endl;

        // If there is a GEO_NEAR it must have an index it can use directly.
        // XXX: move into data access?
        MatchExpression* gnNode = NULL;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
            // No index for GEO_NEAR?  No query.
            RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                QLOG() << "unable to find index for $geoNear query" << endl;
                return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query");
            }

            GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode);

            vector<size_t> newFirst;

            // 2d + GEO_NEAR is annoying.  Because 2d's GEO_NEAR isn't streaming we have to embed
            // the full query tree inside it as a matcher.
            for (size_t i = 0; i < tag->first.size(); ++i) {
                // GEO_NEAR has a non-2d index it can use.  We can deal w/that in normal planning.
                if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) {
                    newFirst.push_back(i);
                    continue;
                }

                // If we're here, GEO_NEAR has a 2d index.  We create a 2dgeonear plan with the
                // entire tree as a filter, if possible.

                GeoNear2DNode* solnRoot = new GeoNear2DNode();
                solnRoot->nq = gnme->getData();
                if (NULL != query.getProj()) {
                    solnRoot->addPointMeta = query.getProj()->wantGeoNearPoint();
                    solnRoot->addDistMeta = query.getProj()->wantGeoNearDistance();
                }

                if (MatchExpression::GEO_NEAR != query.root()->matchType()) {
                    // root is an AND, clone and delete the GEO_NEAR child.
                    MatchExpression* filterTree = query.root()->shallowClone();
                    verify(MatchExpression::AND == filterTree->matchType());

                    bool foundChild = false;
                    for (size_t i = 0; i < filterTree->numChildren(); ++i) {
                        if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) {
                            foundChild = true;
                            filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i);
                            break;
                        }
                    }
                    verify(foundChild);
                    solnRoot->filter.reset(filterTree);
                }

                solnRoot->numWanted = query.getParsed().getNumToReturn();
                if (0 == solnRoot->numWanted) {
                    solnRoot->numWanted = 100;
                }
                solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern;

                // Remove the 2d index.  2d can only be the first field, and we know there is
                // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll
                // only be first for gnNode.
                tag->first.erase(tag->first.begin() + i);

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);

                if (NULL != soln) {
                    out->push_back(soln);
                }
            }

            // Continue planning w/non-2d indices tagged for this pred.
            tag->first.swap(newFirst);

            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return Status::OK();
            }
        }

        // Likewise, if there is a TEXT it must have an index it can use directly.
        MatchExpression* textNode;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
            RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return Status::OK();
            }
        }

        // If we have any relevant indices, we try to create indexed plans.
        if (0 < relevantIndices.size()) {
            // The enumerator spits out trees tagged with IndexTag(s).
            PlanEnumeratorParams enumParams;
            enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION;
            enumParams.root = query.root();
            enumParams.indices = &relevantIndices;

            PlanEnumerator isp(enumParams);
            isp.init();

            MatchExpression* rawTree;
            // XXX: have limit on # of indexed solns we'll consider.  We could have a perverse
            // query and index that could make n^2 very unpleasant.
            while (isp.getNext(&rawTree)) {
                QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString()
                       << endl;

                // This can fail if enumeration makes a mistake.
                QuerySolutionNode* solnRoot =
                    QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices);

                if (NULL == solnRoot) { continue; }

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
                if (NULL != soln) {
                    QLOG() << "Planner: adding solution:\n" << soln->toString() << endl;
                    out->push_back(soln);
                }
            }
        }

        QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n";

        // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
        // scan the entire index to provide results and output that as our plan.  This is the
        // desired behavior when an index is hinted that is not relevant to the query.
        if (!hintIndex.isEmpty()) {
            if (0 == out->size()) {
                QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
                verify(NULL != soln);
                QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl;
                out->push_back(soln);
            }
            return Status::OK();
        }

        // If a sort order is requested, there may be an index that provides it, even if that
        // index is not over any predicates in the query.
        //
        if (!query.getParsed().getSort().isEmpty()
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {

            // See if we have a sort provided from an index already.
            bool usingIndexToSort = false;
            for (size_t i = 0; i < out->size(); ++i) {
                QuerySolution* soln = (*out)[i];
                if (!soln->hasSortStage) {
                    usingIndexToSort = true;
                    break;
                }
            }

            if (!usingIndexToSort) {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const IndexEntry& index = params.indices[i];
                    if (index.sparse) {
                        continue;
                    }
                    const BSONObj kp = LiteParsedQuery::normalizeSortOrder(index.keyPattern);
                    if (providesSort(query, kp)) {
                        QLOG() << "Planner: outputting soln that uses index to provide sort."
                               << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                    if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                        QLOG() << "Planner: outputting soln that uses (reverse) index "
                               << "to provide sort." << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                }
            }
        }

        // TODO: Do we always want to offer a collscan solution?
        // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs.
        if (    !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
             && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)
             && hintIndex.isEmpty()
             && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan)))
        {
            QuerySolution* collscan = buildCollscanSoln(query, false, params);
            if (NULL != collscan) {
                out->push_back(collscan);
                QLOG() << "Planner: outputting a collscan:\n";
                QLOG() << collscan->toString() << endl;
            }
        }

        return Status::OK();
    }
Example #19
0
    /**
     * Traverse the subtree rooted at 'node' to remove invalid RelevantTag assignments to text index
     * 'idx', which has prefix paths 'prefixPaths'.
     */
    static void stripInvalidAssignmentsToTextIndex(MatchExpression* node,
                                                   size_t idx,
            const unordered_set<StringData, StringData::Hasher>& prefixPaths) {

        // If we're here, there are prefixPaths and node is either:
        // 1. a text pred which we can't use as we have nothing over its prefix, or
        // 2. a non-text pred which we can't use as we don't have a text pred AND-related.
        if (Indexability::nodeCanUseIndexOnOwnField(node)) {
            removeIndexRelevantTag(node, idx);
            return;
        }

        // Do not traverse tree beyond negation node.
        if (node->matchType() == MatchExpression::NOT
            || node->matchType() == MatchExpression::NOR) {

            return;
        }

        // For anything to use a text index with prefixes, we require that:
        // 1. The text pred exists in an AND,
        // 2. The non-text preds that use the text index's prefixes are also in that AND.

        if (node->matchType() != MatchExpression::AND) {
            // It's an OR or some kind of array operator.
            for (size_t i = 0; i < node->numChildren(); ++i) {
                stripInvalidAssignmentsToTextIndex(node->getChild(i), idx, prefixPaths);
            }
            return;
        }

        // If we're here, we're an AND.  Determine whether the children satisfy the index prefix for
        // the text index.
        invariant(node->matchType() == MatchExpression::AND);

        bool hasText = false;

        // The AND must have an EQ predicate for each prefix path.  When we encounter a child with a
        // tag we remove it from childrenPrefixPaths.  All children exist if this set is empty at
        // the end.
        unordered_set<StringData, StringData::Hasher> childrenPrefixPaths = prefixPaths;

        for (size_t i = 0; i < node->numChildren(); ++i) {
            MatchExpression* child = node->getChild(i);
            RelevantTag* tag = static_cast<RelevantTag*>(child->getTag());

            if (NULL == tag) {
                // 'child' could be a logical operator.  Maybe there are some assignments hiding
                // inside.
                stripInvalidAssignmentsToTextIndex(child, idx, prefixPaths);
                continue;
            }

            bool inFirst = tag->first.end() != std::find(tag->first.begin(),
                                                         tag->first.end(),
                                                         idx);

            bool inNotFirst = tag->notFirst.end() != std::find(tag->notFirst.begin(),
                                                               tag->notFirst.end(),
                                                               idx);

            if (inFirst || inNotFirst) {
                // Great!  'child' was assigned to our index.
                if (child->matchType() == MatchExpression::TEXT) {
                    hasText = true;
                }
                else {
                    childrenPrefixPaths.erase(child->path());
                    // One fewer prefix we're looking for, possibly.  Note that we could have a
                    // suffix assignment on the index and wind up here.  In this case the erase
                    // above won't do anything since a suffix isn't a prefix.
                }
            }
            else {
                // Recurse on the children to ensure that they're not hiding any assignments
                // to idx.
                stripInvalidAssignmentsToTextIndex(child, idx, prefixPaths);
            }
        }

        // Our prereqs for using the text index were not satisfied so we remove the assignments from
        // all children of the AND.
        if (!hasText || !childrenPrefixPaths.empty()) {
            for (size_t i = 0; i < node->numChildren(); ++i) {
                stripInvalidAssignmentsToTextIndex(node->getChild(i), idx, prefixPaths);
            }
        }
    }
Example #20
0
    static void stripInvalidAssignmentsTo2dsphereIndex(MatchExpression* node, size_t idx) {

        if (Indexability::nodeCanUseIndexOnOwnField(node)) {
            removeIndexRelevantTag(node, idx);
            return;
        }

        const MatchExpression::MatchType nodeType = node->matchType();

        // Don't bother peeking inside of negations.
        if (MatchExpression::NOT == nodeType || MatchExpression::NOR == nodeType) {
            return;
        }

        if (MatchExpression::AND != nodeType) {
            // It's an OR or some kind of array operator.
            for (size_t i = 0; i < node->numChildren(); ++i) {
                stripInvalidAssignmentsTo2dsphereIndex(node->getChild(i), idx);
            }
            return;
        }

        bool hasGeoField = false;

        for (size_t i = 0; i < node->numChildren(); ++i) {
            MatchExpression* child = node->getChild(i);
            RelevantTag* tag = static_cast<RelevantTag*>(child->getTag());

            if (NULL == tag) {
                // 'child' could be a logical operator.  Maybe there are some assignments hiding
                // inside.
                stripInvalidAssignmentsTo2dsphereIndex(child, idx);
                continue;
            }

            bool inFirst = tag->first.end() != std::find(tag->first.begin(),
                                                         tag->first.end(),
                                                         idx);

            bool inNotFirst = tag->notFirst.end() != std::find(tag->notFirst.begin(),
                                                               tag->notFirst.end(),
                                                               idx);

            // If there is an index assignment...
            if (inFirst || inNotFirst) {
                // And it's a geo predicate...
                if (MatchExpression::GEO == child->matchType() ||
                    MatchExpression::GEO_NEAR == child->matchType()) {

                    hasGeoField = true;
                }
            }
            else {
                // Recurse on the children to ensure that they're not hiding any assignments
                // to idx.
                stripInvalidAssignmentsTo2dsphereIndex(child, idx);
            }
        }

        // If there isn't a geo predicate our results aren't a subset of what's in the geo index, so
        // if we use the index we'll miss results.
        if (!hasGeoField) {
            for (size_t i = 0; i < node->numChildren(); ++i) {
                stripInvalidAssignmentsTo2dsphereIndex(node->getChild(i), idx);
            }
        }
    }
Example #21
0
Status SubplanStage::planSubqueries() {
    _orExpression = _query->root()->shallowClone();
    if (isContainedOr(_orExpression.get())) {
        _orExpression = rewriteToRootedOr(std::move(_orExpression));
        invariant(CanonicalQuery::isValid(_orExpression.get(), _query->getParsed()).isOK());
    }

    for (size_t i = 0; i < _plannerParams.indices.size(); ++i) {
        const IndexEntry& ie = _plannerParams.indices[i];
        _indexMap[ie.keyPattern] = i;
        LOG(5) << "Subplanner: index " << i << " is " << ie.toString();
    }

    const ExtensionsCallbackReal extensionsCallback(getOpCtx(), &_collection->ns());

    for (size_t i = 0; i < _orExpression->numChildren(); ++i) {
        // We need a place to shove the results from planning this branch.
        _branchResults.push_back(new BranchPlanningResult());
        BranchPlanningResult* branchResult = _branchResults.back();

        MatchExpression* orChild = _orExpression->getChild(i);

        // Turn the i-th child into its own query.
        auto statusWithCQ = CanonicalQuery::canonicalize(*_query, orChild, extensionsCallback);
        if (!statusWithCQ.isOK()) {
            mongoutils::str::stream ss;
            ss << "Can't canonicalize subchild " << orChild->toString() << " "
               << statusWithCQ.getStatus().reason();
            return Status(ErrorCodes::BadValue, ss);
        }

        branchResult->canonicalQuery = std::move(statusWithCQ.getValue());

        // Plan the i-th child. We might be able to find a plan for the i-th child in the plan
        // cache. If there's no cached plan, then we generate and rank plans using the MPS.
        CachedSolution* rawCS;
        if (PlanCache::shouldCacheQuery(*branchResult->canonicalQuery) &&
            _collection->infoCache()
                ->getPlanCache()
                ->get(*branchResult->canonicalQuery, &rawCS)
                .isOK()) {
            // We have a CachedSolution. Store it for later.
            LOG(5) << "Subplanner: cached plan found for child " << i << " of "
                   << _orExpression->numChildren();

            branchResult->cachedSolution.reset(rawCS);
        } else {
            // No CachedSolution found. We'll have to plan from scratch.
            LOG(5) << "Subplanner: planning child " << i << " of " << _orExpression->numChildren();

            // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from
            // considering any plan that's a collscan.
            Status status = QueryPlanner::plan(*branchResult->canonicalQuery,
                                               _plannerParams,
                                               &branchResult->solutions.mutableVector());

            if (!status.isOK()) {
                mongoutils::str::stream ss;
                ss << "Can't plan for subchild " << branchResult->canonicalQuery->toString() << " "
                   << status.reason();
                return Status(ErrorCodes::BadValue, ss);
            }
            LOG(5) << "Subplanner: got " << branchResult->solutions.size() << " solutions";

            if (0 == branchResult->solutions.size()) {
                // If one child doesn't have an indexed solution, bail out.
                mongoutils::str::stream ss;
                ss << "No solutions for subchild " << branchResult->canonicalQuery->toString();
                return Status(ErrorCodes::BadValue, ss);
            }
        }
    }

    return Status::OK();
}
Example #22
0
    // static
    void QueryPlanner::plan(const CanonicalQuery& query,
                            const QueryPlannerParams& params,
                            vector<QuerySolution*>* out) {
        QLOG() << "=============================\n"
               << "Beginning planning, options = " << optionString(params.options) << endl
               << "Canonical query:\n" << query.toString() << endl
               << "============================="
               << endl;

        // The shortcut formerly known as IDHACK.  See if it's a simple _id query.  If so we might
        // just make an ixscan over the _id index and bypass the rest of planning entirely.
        if (!query.getParsed().isExplain() && !query.getParsed().showDiskLoc()
            && isSimpleIdQuery(query.getParsed().getFilter())
            && !query.getParsed().hasOption(QueryOption_CursorTailable)) {

            // See if we can find an _id index.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    const IndexEntry& index = params.indices[i];
                    QLOG() << "IDHACK using index " << index.toString() << endl;

                    // If so, we make a simple scan to find the doc.
                    IndexScanNode* isn = new IndexScanNode();
                    isn->indexKeyPattern = index.keyPattern;
                    isn->indexIsMultiKey = index.multikey;
                    isn->direction = 1;
                    isn->bounds.isSimpleRange = true;
                    BSONObj key = getKeyFromQuery(index.keyPattern, query.getParsed().getFilter());
                    isn->bounds.startKey = isn->bounds.endKey = key;
                    isn->bounds.endKeyInclusive = true;
                    isn->computeProperties();

                    QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, isn);

                    if (NULL != soln) {
                        out->push_back(soln);
                        QLOG() << "IDHACK solution is:\n" << (*out)[0]->toString() << endl;
                        // And that's it.
                        return;
                    }
                }
            }
        }

        for (size_t i = 0; i < params.indices.size(); ++i) {
            QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl;
        }

        bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

        // If the query requests a tailable cursor, the only solution is a collscan + filter with
        // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
        // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
        // can't provide one.  Is this what we want?
        if (query.getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
                && canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, true, params);
                if (NULL != soln) {
                    out->push_back(soln);
                }
            }
            return;
        }

        // The hint can be $natural: 1.  If this happens, output a collscan.  It's a weird way of
        // saying "table scan for two, please."
        if (!query.getParsed().getHint().isEmpty()) {
            BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural");
            if (!natural.eoo()) {
                QLOG() << "forcing a table scan due to hinted $natural\n";
                if (canTableScan) {
                    QuerySolution* soln = buildCollscanSoln(query, false, params);
                    if (NULL != soln) {
                        out->push_back(soln);
                    }
                }
                return;
            }
        }

        // NOR and NOT we can't handle well with indices.  If we see them here, they weren't
        // rewritten to remove the negation.  Just output a collscan for those.
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOT)
            || QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOR)) {

            // If there's a near predicate, we can't handle this.
            // TODO: Should canonicalized query detect this?
            if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)) {
                warning() << "Can't handle NOT/NOR with GEO_NEAR";
                return;
            }
            QLOG() << "NOT/NOR in plan, just outtping a collscan\n";
            if (canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, false, params);
                if (NULL != soln) {
                    out->push_back(soln);
                }
            }
            return;
        }

        // Figure out what fields we care about.
        unordered_set<string> fields;
        QueryPlannerIXSelect::getFields(query.root(), "", &fields);

        for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
            QLOG() << "predicate over field " << *it << endl;
        }

        // Filter our indices so we only look at indices that are over our predicates.
        vector<IndexEntry> relevantIndices;

        // Hints require us to only consider the hinted index.
        BSONObj hintIndex = query.getParsed().getHint();

        // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
        // plan.  If that fails, just scan the _id index.
        if (query.getParsed().isSnapshot()) {
            // Find the ID index in indexKeyPatterns.  It's our hint.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    hintIndex = params.indices[i].keyPattern;
                    break;
                }
            }
        }

        size_t hintIndexNumber = numeric_limits<size_t>::max();

        if (!hintIndex.isEmpty()) {
            // Sigh.  If the hint is specified it might be using the index name.
            BSONElement firstHintElt = hintIndex.firstElement();
            if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
                string hintName = firstHintElt.String();
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (params.indices[i].name == hintName) {
                        QLOG() << "hint by name specified, restricting indices to "
                             << params.indices[i].keyPattern.toString() << endl;
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        hintIndexNumber = i;
                        hintIndex = params.indices[i].keyPattern;
                        break;
                    }
                }
            }
            else {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        QLOG() << "hint specified, restricting indices to " << hintIndex.toString()
                             << endl;
                        hintIndexNumber = i;
                        break;
                    }
                }
            }

            if (hintIndexNumber == numeric_limits<size_t>::max()) {
                // This is supposed to be an error.
                warning() << "Can't find hint for " << hintIndex.toString();
                return;
            }
        }
        else {
            QLOG() << "Finding relevant indices\n";
            QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);
        }

        for (size_t i = 0; i < relevantIndices.size(); ++i) {
            QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl;
        }

        // Figure out how useful each index is to each predicate.
        // query.root() is now annotated with RelevantTag(s).
        QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);

        QLOG() << "rated tree" << endl;
        QLOG() << query.root()->toString() << endl;

        // If there is a GEO_NEAR it must have an index it can use directly.
        // XXX: move into data access?
        MatchExpression* gnNode = NULL;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
            // No index for GEO_NEAR?  No query.
            RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return;
            }

            GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode);

            vector<size_t> newFirst;

            // 2d + GEO_NEAR is annoying.  Because 2d's GEO_NEAR isn't streaming we have to embed
            // the full query tree inside it as a matcher.
            for (size_t i = 0; i < tag->first.size(); ++i) {
                // GEO_NEAR has a non-2d index it can use.  We can deal w/that in normal planning.
                if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) {
                    newFirst.push_back(i);
                    continue;
                }

                // If we're here, GEO_NEAR has a 2d index.  We create a 2dgeonear plan with the
                // entire tree as a filter, if possible.

                GeoNear2DNode* solnRoot = new GeoNear2DNode();
                solnRoot->nq = gnme->getData();

                if (MatchExpression::GEO_NEAR != query.root()->matchType()) {
                    // root is an AND, clone and delete the GEO_NEAR child.
                    MatchExpression* filterTree = query.root()->shallowClone();
                    verify(MatchExpression::AND == filterTree->matchType());

                    bool foundChild = false;
                    for (size_t i = 0; i < filterTree->numChildren(); ++i) {
                        if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) {
                            foundChild = true;
                            filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i);
                            break;
                        }
                    }
                    verify(foundChild);
                    solnRoot->filter.reset(filterTree);
                }

                solnRoot->numWanted = query.getParsed().getNumToReturn();
                if (0 == solnRoot->numWanted) {
                    solnRoot->numWanted = 100;
                }
                solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern;

                // Remove the 2d index.  2d can only be the first field, and we know there is
                // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll
                // only be first for gnNode.
                tag->first.erase(tag->first.begin() + i);

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);

                if (NULL != soln) {
                    out->push_back(soln);
                }
            }

            // Continue planning w/non-2d indices tagged for this pred.
            tag->first.swap(newFirst);

            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return;
            }
        }

        // Likewise, if there is a TEXT it must have an index it can use directly.
        MatchExpression* textNode;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
            RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return;
            }
        }

        // If we have any relevant indices, we try to create indexed plans.
        if (0 < relevantIndices.size()) {
            // The enumerator spits out trees tagged with IndexTag(s).
            PlanEnumerator isp(query.root(), &relevantIndices);
            isp.init();

            MatchExpression* rawTree;
            while (isp.getNext(&rawTree)) {
                QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString()
                       << endl;

                // This can fail if enumeration makes a mistake.
                QuerySolutionNode* solnRoot =
                    QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices);

                if (NULL == solnRoot) { continue; }

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
                if (NULL != soln) {
                    QLOG() << "Planner: adding solution:\n" << soln->toString() << endl;
                    out->push_back(soln);
                }
            }
        }

        QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n";

        // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
        // scan the entire index to provide results and output that as our plan.  This is the
        // desired behavior when an index is hinted that is not relevant to the query.
        if (!hintIndex.isEmpty() && (0 == out->size())) {
            QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
            if (NULL != soln) {
                QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl;
                out->push_back(soln);
            }
            return;
        }

        // If a sort order is requested, there may be an index that provides it, even if that
        // index is not over any predicates in the query.
        //
        // XXX XXX: Can we do this even if the index is sparse?  Might we miss things?
        if (!query.getParsed().getSort().isEmpty()
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {

            // See if we have a sort provided from an index already.
            bool usingIndexToSort = false;
            for (size_t i = 0; i < out->size(); ++i) {
                QuerySolution* soln = (*out)[i];
                if (!soln->hasSortStage) {
                    usingIndexToSort = true;
                    break;
                }
            }

            if (!usingIndexToSort) {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const BSONObj& kp = params.indices[i].keyPattern;
                    if (providesSort(query, kp)) {
                        QLOG() << "Planner: outputting soln that uses index to provide sort."
                               << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                    if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                        QLOG() << "Planner: outputting soln that uses (reverse) index "
                               << "to provide sort." << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                }
            }
        }

        // TODO: Do we always want to offer a collscan solution?
        // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs.
        if (    !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
             && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)
             && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan)))
        {
            QuerySolution* collscan = buildCollscanSoln(query, false, params);
            if (NULL != collscan) {
                out->push_back(collscan);
                QLOG() << "Planner: outputting a collscan:\n";
                QLOG() << collscan->toString() << endl;
            }
        }
    }
Example #23
0
// static
MatchExpression* CanonicalQuery::normalizeTree(MatchExpression* root) {
    if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) {
        // We could have AND of AND of AND.  Make sure we clean up our children before merging them.
        for (size_t i = 0; i < root->getChildVector()->size(); ++i) {
            (*root->getChildVector())[i] = normalizeTree(root->getChild(i));
        }

        // If any of our children are of the same logical operator that we are, we remove the
        // child's children and append them to ourselves after we examine all children.
        std::vector<MatchExpression*> absorbedChildren;

        for (size_t i = 0; i < root->numChildren();) {
            MatchExpression* child = root->getChild(i);
            if (child->matchType() == root->matchType()) {
                // AND of an AND or OR of an OR.  Absorb child's children into ourself.
                for (size_t j = 0; j < child->numChildren(); ++j) {
                    absorbedChildren.push_back(child->getChild(j));
                }
                // TODO(opt): this is possibly n^2-ish
                root->getChildVector()->erase(root->getChildVector()->begin() + i);
                child->getChildVector()->clear();
                // Note that this only works because we cleared the child's children
                delete child;
                // Don't increment 'i' as the current child 'i' used to be child 'i+1'
            } else {
                ++i;
            }
        }

        root->getChildVector()->insert(
            root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end());

        // AND of 1 thing is the thing, OR of 1 thing is the thing.
        if (1 == root->numChildren()) {
            MatchExpression* ret = root->getChild(0);
            root->getChildVector()->clear();
            delete root;
            return ret;
        }
    } else if (MatchExpression::NOR == root->matchType()) {
        // First clean up children.
        for (size_t i = 0; i < root->getChildVector()->size(); ++i) {
            (*root->getChildVector())[i] = normalizeTree(root->getChild(i));
        }

        // NOR of one thing is NOT of the thing.
        if (1 == root->numChildren()) {
            // Detach the child and assume ownership.
            std::unique_ptr<MatchExpression> child(root->getChild(0));
            root->getChildVector()->clear();

            // Delete the root when this goes out of scope.
            std::unique_ptr<NorMatchExpression> ownedRoot(static_cast<NorMatchExpression*>(root));

            // Make a NOT to be the new root and transfer ownership of the child to it.
            auto newRoot = stdx::make_unique<NotMatchExpression>();
            newRoot->init(child.release()).transitional_ignore();

            return newRoot.release();
        }
    } else if (MatchExpression::NOT == root->matchType()) {
        // Normalize the rest of the tree hanging off this NOT node.
        NotMatchExpression* nme = static_cast<NotMatchExpression*>(root);
        MatchExpression* child = nme->releaseChild();
        // normalizeTree(...) takes ownership of 'child', and then
        // transfers ownership of its return value to 'nme'.
        nme->resetChild(normalizeTree(child));
    } else if (MatchExpression::ELEM_MATCH_OBJECT == root->matchType()) {
        // Normalize the rest of the tree hanging off this ELEM_MATCH_OBJECT node.
        ElemMatchObjectMatchExpression* emome = static_cast<ElemMatchObjectMatchExpression*>(root);
        auto child = emome->releaseChild();
        // normalizeTree(...) takes ownership of 'child', and then
        // transfers ownership of its return value to 'emome'.
        emome->resetChild(std::unique_ptr<MatchExpression>(normalizeTree(child.release())));
    } else if (MatchExpression::ELEM_MATCH_VALUE == root->matchType()) {
        // Just normalize our children.
        for (size_t i = 0; i < root->getChildVector()->size(); ++i) {
            (*root->getChildVector())[i] = normalizeTree(root->getChild(i));
        }
    } else if (MatchExpression::MATCH_IN == root->matchType()) {
        std::unique_ptr<InMatchExpression> in(static_cast<InMatchExpression*>(root));

        // IN of 1 regex is the regex.
        if (in->getRegexes().size() == 1 && in->getEqualities().empty()) {
            RegexMatchExpression* childRe = in->getRegexes().begin()->get();
            invariant(!childRe->getTag());

            // Create a new RegexMatchExpression, because 'childRe' does not have a path.
            auto re = stdx::make_unique<RegexMatchExpression>();
            re->init(in->path(), childRe->getString(), childRe->getFlags()).transitional_ignore();
            if (in->getTag()) {
                re->setTag(in->getTag()->clone());
            }
            return normalizeTree(re.release());
        }

        // IN of 1 equality is the equality.
        if (in->getEqualities().size() == 1 && in->getRegexes().empty()) {
            auto eq = stdx::make_unique<EqualityMatchExpression>();
            eq->init(in->path(), *(in->getEqualities().begin())).transitional_ignore();
            eq->setCollator(in->getCollator());
            if (in->getTag()) {
                eq->setTag(in->getTag()->clone());
            }
            return eq.release();
        }

        return in.release();
    }

    return root;
}
Example #24
0
Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
    // This is the skeleton of index selections that is inserted into the cache.
    std::unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());

    for (size_t i = 0; i < _orExpression->numChildren(); ++i) {
        MatchExpression* orChild = _orExpression->getChild(i);
        BranchPlanningResult* branchResult = _branchResults[i].get();

        if (branchResult->cachedSolution.get()) {
            // We can get the index tags we need out of the cache.
            Status tagStatus = tagOrChildAccordingToCache(
                cacheData.get(), branchResult->cachedSolution->plannerData[0], orChild, _indexMap);
            if (!tagStatus.isOK()) {
                return tagStatus;
            }
        } else if (1 == branchResult->solutions.size()) {
            QuerySolution* soln = branchResult->solutions.front().get();
            Status tagStatus = tagOrChildAccordingToCache(
                cacheData.get(), soln->cacheData.get(), orChild, _indexMap);
            if (!tagStatus.isOK()) {
                return tagStatus;
            }
        } else {
            // N solutions, rank them.

            // We already checked for zero solutions in planSubqueries(...).
            invariant(!branchResult->solutions.empty());

            _ws->clear();

            // We pass the SometimesCache option to the MPS because the SubplanStage currently does
            // not use the CachedPlanStage's eviction mechanism. We therefore are more conservative
            // about putting a potentially bad plan into the cache in the subplan path.
            // We temporarily add the MPS to _children to ensure that we pass down all
            // save/restore/invalidate messages that can be generated if pickBestPlan yields.
            invariant(_children.empty());
            _children.emplace_back(
                stdx::make_unique<MultiPlanStage>(getOpCtx(),
                                                  _collection,
                                                  branchResult->canonicalQuery.get(),
                                                  MultiPlanStage::CachingMode::SometimesCache));
            ON_BLOCK_EXIT([&] {
                invariant(_children.size() == 1);  // Make sure nothing else was added to _children.
                _children.pop_back();
            });
            MultiPlanStage* multiPlanStage = static_cast<MultiPlanStage*>(child().get());

            // Dump all the solutions into the MPS.
            for (size_t ix = 0; ix < branchResult->solutions.size(); ++ix) {
                PlanStage* nextPlanRoot;
                invariant(StageBuilder::build(getOpCtx(),
                                              _collection,
                                              *branchResult->canonicalQuery,
                                              *branchResult->solutions[ix],
                                              _ws,
                                              &nextPlanRoot));

                // Takes ownership of 'nextPlanRoot'.
                multiPlanStage->addPlan(std::move(branchResult->solutions[ix]), nextPlanRoot, _ws);
            }

            Status planSelectStat = multiPlanStage->pickBestPlan(yieldPolicy);
            if (!planSelectStat.isOK()) {
                return planSelectStat;
            }

            if (!multiPlanStage->bestPlanChosen()) {
                mongoutils::str::stream ss;
                ss << "Failed to pick best plan for subchild "
                   << branchResult->canonicalQuery->toString();
                return Status(ErrorCodes::BadValue, ss);
            }

            QuerySolution* bestSoln = multiPlanStage->bestSolution();

            // Check that we have good cache data. For example, we don't cache things
            // for 2d indices.
            if (NULL == bestSoln->cacheData.get()) {
                mongoutils::str::stream ss;
                ss << "No cache data for subchild " << orChild->toString();
                return Status(ErrorCodes::BadValue, ss);
            }

            if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) {
                mongoutils::str::stream ss;
                ss << "No indexed cache data for subchild " << orChild->toString();
                return Status(ErrorCodes::BadValue, ss);
            }

            // Add the index assignments to our original query.
            Status tagStatus = QueryPlanner::tagAccordingToCache(
                orChild, bestSoln->cacheData->tree.get(), _indexMap);

            if (!tagStatus.isOK()) {
                mongoutils::str::stream ss;
                ss << "Failed to extract indices from subchild " << orChild->toString();
                return Status(ErrorCodes::BadValue, ss);
            }

            cacheData->children.push_back(bestSoln->cacheData->tree->clone());
        }
    }

    // Must do this before using the planner functionality.
    prepareForAccessPlanning(_orExpression.get());

    // Use the cached index assignments to build solnRoot. Takes ownership of '_orExpression'.
    std::unique_ptr<QuerySolutionNode> solnRoot(QueryPlannerAccess::buildIndexedDataAccess(
        *_query, std::move(_orExpression), _plannerParams.indices, _plannerParams));

    if (!solnRoot) {
        mongoutils::str::stream ss;
        ss << "Failed to build indexed data path for subplanned query\n";
        return Status(ErrorCodes::BadValue, ss);
    }

    LOG(5) << "Subplanner: fully tagged tree is " << redact(solnRoot->toString());

    // Takes ownership of 'solnRoot'
    _compositeSolution =
        QueryPlannerAnalysis::analyzeDataAccess(*_query, _plannerParams, std::move(solnRoot));

    if (NULL == _compositeSolution.get()) {
        mongoutils::str::stream ss;
        ss << "Failed to analyze subplanned query";
        return Status(ErrorCodes::BadValue, ss);
    }

    LOG(5) << "Subplanner: Composite solution is " << redact(_compositeSolution->toString());

    // Use the index tags from planning each branch to construct the composite solution,
    // and set that solution as our child stage.
    _ws->clear();
    PlanStage* root;
    invariant(StageBuilder::build(
        getOpCtx(), _collection, *_query, *_compositeSolution.get(), _ws, &root));
    invariant(_children.empty());
    _children.emplace_back(root);

    return Status::OK();
}
Example #25
0
Status SubplanStage::planSubqueries() {
    _orExpression = _query->root()->shallowClone();
    for (size_t i = 0; i < _plannerParams.indices.size(); ++i) {
        const IndexEntry& ie = _plannerParams.indices[i];
        _indexMap[ie.name] = i;
        LOG(5) << "Subplanner: index " << i << " is " << ie;
    }

    for (size_t i = 0; i < _orExpression->numChildren(); ++i) {
        // We need a place to shove the results from planning this branch.
        _branchResults.push_back(stdx::make_unique<BranchPlanningResult>());
        BranchPlanningResult* branchResult = _branchResults.back().get();

        MatchExpression* orChild = _orExpression->getChild(i);

        // Turn the i-th child into its own query.
        auto statusWithCQ = CanonicalQuery::canonicalize(getOpCtx(), *_query, orChild);
        if (!statusWithCQ.isOK()) {
            mongoutils::str::stream ss;
            ss << "Can't canonicalize subchild " << orChild->toString() << " "
               << statusWithCQ.getStatus().reason();
            return Status(ErrorCodes::BadValue, ss);
        }

        branchResult->canonicalQuery = std::move(statusWithCQ.getValue());

        // Plan the i-th child. We might be able to find a plan for the i-th child in the plan
        // cache. If there's no cached plan, then we generate and rank plans using the MPS.
        const auto* planCache = _collection->infoCache()->getPlanCache();
        if (auto cachedSol = planCache->getCacheEntryIfCacheable(*branchResult->canonicalQuery)) {
            // We have a CachedSolution. Store it for later.
            LOG(5) << "Subplanner: cached plan found for child " << i << " of "
                   << _orExpression->numChildren();

            branchResult->cachedSolution = std::move(cachedSol);
        } else {
            // No CachedSolution found. We'll have to plan from scratch.
            LOG(5) << "Subplanner: planning child " << i << " of " << _orExpression->numChildren();

            // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from
            // considering any plan that's a collscan.
            invariant(branchResult->solutions.empty());
            auto solutions = QueryPlanner::plan(*branchResult->canonicalQuery, _plannerParams);
            if (!solutions.isOK()) {
                mongoutils::str::stream ss;
                ss << "Can't plan for subchild " << branchResult->canonicalQuery->toString() << " "
                   << solutions.getStatus().reason();
                return Status(ErrorCodes::BadValue, ss);
            }
            branchResult->solutions = std::move(solutions.getValue());

            LOG(5) << "Subplanner: got " << branchResult->solutions.size() << " solutions";

            if (0 == branchResult->solutions.size()) {
                // If one child doesn't have an indexed solution, bail out.
                mongoutils::str::stream ss;
                ss << "No solutions for subchild " << branchResult->canonicalQuery->toString();
                return Status(ErrorCodes::BadValue, ss);
            }
        }
    }

    return Status::OK();
}
Example #26
0
// static
Status QueryPlanner::plan(const CanonicalQuery& query,
                          const QueryPlannerParams& params,
                          std::vector<QuerySolution*>* out) {
    LOG(5) << "Beginning planning..." << endl
           << "=============================" << endl
           << "Options = " << optionString(params.options) << endl
           << "Canonical query:" << endl
           << query.toString() << "=============================" << endl;

    for (size_t i = 0; i < params.indices.size(); ++i) {
        LOG(5) << "Index " << i << " is " << params.indices[i].toString() << endl;
    }

    bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

    // If the query requests a tailable cursor, the only solution is a collscan + filter with
    // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
    // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
    // can't provide one.  Is this what we want?
    if (query.getParsed().isTailable()) {
        if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) {
            QuerySolution* soln = buildCollscanSoln(query, true, params);
            if (NULL != soln) {
                out->push_back(soln);
            }
        }
        return Status::OK();
    }

    // The hint or sort can be $natural: 1.  If this happens, output a collscan. If both
    // a $natural hint and a $natural sort are specified, then the direction of the collscan
    // is determined by the sign of the sort (not the sign of the hint).
    if (!query.getParsed().getHint().isEmpty() || !query.getParsed().getSort().isEmpty()) {
        BSONObj hintObj = query.getParsed().getHint();
        BSONObj sortObj = query.getParsed().getSort();
        BSONElement naturalHint = hintObj.getFieldDotted("$natural");
        BSONElement naturalSort = sortObj.getFieldDotted("$natural");

        // A hint overrides a $natural sort. This means that we don't force a table
        // scan if there is a $natural sort with a non-$natural hint.
        if (!naturalHint.eoo() || (!naturalSort.eoo() && hintObj.isEmpty())) {
            LOG(5) << "Forcing a table scan due to hinted $natural\n";
            // min/max are incompatible with $natural.
            if (canTableScan && query.getParsed().getMin().isEmpty() &&
                query.getParsed().getMax().isEmpty()) {
                QuerySolution* soln = buildCollscanSoln(query, false, params);
                if (NULL != soln) {
                    out->push_back(soln);
                }
            }
            return Status::OK();
        }
    }

    // Figure out what fields we care about.
    unordered_set<string> fields;
    QueryPlannerIXSelect::getFields(query.root(), "", &fields);

    for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
        LOG(5) << "Predicate over field '" << *it << "'" << endl;
    }

    // Filter our indices so we only look at indices that are over our predicates.
    vector<IndexEntry> relevantIndices;

    // Hints require us to only consider the hinted index.
    // If index filters in the query settings were used to override
    // the allowed indices for planning, we should not use the hinted index
    // requested in the query.
    BSONObj hintIndex;
    if (!params.indexFiltersApplied) {
        hintIndex = query.getParsed().getHint();
    }

    // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
    // plan.  If that fails, just scan the _id index.
    if (query.getParsed().isSnapshot()) {
        // Find the ID index in indexKeyPatterns.  It's our hint.
        for (size_t i = 0; i < params.indices.size(); ++i) {
            if (isIdIndex(params.indices[i].keyPattern)) {
                hintIndex = params.indices[i].keyPattern;
                break;
            }
        }
    }

    size_t hintIndexNumber = numeric_limits<size_t>::max();

    if (hintIndex.isEmpty()) {
        QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);
    } else {
        // Sigh.  If the hint is specified it might be using the index name.
        BSONElement firstHintElt = hintIndex.firstElement();
        if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
            string hintName = firstHintElt.String();
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (params.indices[i].name == hintName) {
                    LOG(5) << "Hint by name specified, restricting indices to "
                           << params.indices[i].keyPattern.toString() << endl;
                    relevantIndices.clear();
                    relevantIndices.push_back(params.indices[i]);
                    hintIndexNumber = i;
                    hintIndex = params.indices[i].keyPattern;
                    break;
                }
            }
        } else {
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                    relevantIndices.clear();
                    relevantIndices.push_back(params.indices[i]);
                    LOG(5) << "Hint specified, restricting indices to " << hintIndex.toString()
                           << endl;
                    hintIndexNumber = i;
                    break;
                }
            }
        }

        if (hintIndexNumber == numeric_limits<size_t>::max()) {
            return Status(ErrorCodes::BadValue, "bad hint");
        }
    }

    // Deal with the .min() and .max() query options.  If either exist we can only use an index
    // that matches the object inside.
    if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) {
        BSONObj minObj = query.getParsed().getMin();
        BSONObj maxObj = query.getParsed().getMax();

        // The unfinished siblings of these objects may not be proper index keys because they
        // may be empty objects or have field names. When an index is picked to use for the
        // min/max query, these "finished" objects will always be valid index keys for the
        // index's key pattern.
        BSONObj finishedMinObj;
        BSONObj finishedMaxObj;

        // This is the index into params.indices[...] that we use.
        size_t idxNo = numeric_limits<size_t>::max();

        // If there's an index hinted we need to be able to use it.
        if (!hintIndex.isEmpty()) {
            if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) {
                LOG(5) << "Minobj doesn't work with hint";
                return Status(ErrorCodes::BadValue, "hint provided does not work with min query");
            }

            if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) {
                LOG(5) << "Maxobj doesn't work with hint";
                return Status(ErrorCodes::BadValue, "hint provided does not work with max query");
            }

            const BSONObj& kp = params.indices[hintIndexNumber].keyPattern;
            finishedMinObj = finishMinObj(kp, minObj, maxObj);
            finishedMaxObj = finishMaxObj(kp, minObj, maxObj);

            // The min must be less than the max for the hinted index ordering.
            if (0 <= finishedMinObj.woCompare(finishedMaxObj, kp, false)) {
                LOG(5) << "Minobj/Maxobj don't work with hint";
                return Status(ErrorCodes::BadValue,
                              "hint provided does not work with min/max query");
            }

            idxNo = hintIndexNumber;
        } else {
            // No hinted index, look for one that is compatible (has same field names and
            // ordering thereof).
            for (size_t i = 0; i < params.indices.size(); ++i) {
                const BSONObj& kp = params.indices[i].keyPattern;

                BSONObj toUse = minObj.isEmpty() ? maxObj : minObj;
                if (indexCompatibleMaxMin(toUse, kp)) {
                    // In order to be fully compatible, the min has to be less than the max
                    // according to the index key pattern ordering. The first step in verifying
                    // this is "finish" the min and max by replacing empty objects and stripping
                    // field names.
                    finishedMinObj = finishMinObj(kp, minObj, maxObj);
                    finishedMaxObj = finishMaxObj(kp, minObj, maxObj);

                    // Now we have the final min and max. This index is only relevant for
                    // the min/max query if min < max.
                    if (0 >= finishedMinObj.woCompare(finishedMaxObj, kp, false)) {
                        // Found a relevant index.
                        idxNo = i;
                        break;
                    }

                    // This index is not relevant; move on to the next.
                }
            }
        }

        if (idxNo == numeric_limits<size_t>::max()) {
            LOG(5) << "Can't find relevant index to use for max/min query";
            // Can't find an index to use, bail out.
            return Status(ErrorCodes::BadValue, "unable to find relevant index for max/min query");
        }

        LOG(5) << "Max/min query using index " << params.indices[idxNo].toString() << endl;

        // Make our scan and output.
        QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan(
            params.indices[idxNo], query, params, finishedMinObj, finishedMaxObj);

        QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
        if (NULL != soln) {
            out->push_back(soln);
        }

        return Status::OK();
    }

    for (size_t i = 0; i < relevantIndices.size(); ++i) {
        LOG(2) << "Relevant index " << i << " is " << relevantIndices[i].toString() << endl;
    }

    // Figure out how useful each index is to each predicate.
    QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);
    QueryPlannerIXSelect::stripInvalidAssignments(query.root(), relevantIndices);

    // Unless we have GEO_NEAR, TEXT, or a projection, we may be able to apply an optimization
    // in which we strip unnecessary index assignments.
    //
    // Disallowed with projection because assignment to a non-unique index can allow the plan
    // to be covered.
    //
    // TEXT and GEO_NEAR are special because they require the use of a text/geo index in order
    // to be evaluated correctly. Stripping these "mandatory assignments" is therefore invalid.
    if (query.getParsed().getProj().isEmpty() &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {
        QueryPlannerIXSelect::stripUnneededAssignments(query.root(), relevantIndices);
    }

    // query.root() is now annotated with RelevantTag(s).
    LOG(5) << "Rated tree:" << endl
           << query.root()->toString();

    // If there is a GEO_NEAR it must have an index it can use directly.
    MatchExpression* gnNode = NULL;
    if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
        // No index for GEO_NEAR?  No query.
        RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
        if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
            LOG(5) << "Unable to find index for $geoNear query." << endl;
            // Don't leave tags on query tree.
            query.root()->resetTag();
            return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query");
        }

        LOG(5) << "Rated tree after geonear processing:" << query.root()->toString();
    }

    // Likewise, if there is a TEXT it must have an index it can use directly.
    MatchExpression* textNode = NULL;
    if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
        RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());

        // Exactly one text index required for TEXT.  We need to check this explicitly because
        // the text stage can't be built if no text index exists or there is an ambiguity as to
        // which one to use.
        size_t textIndexCount = 0;
        for (size_t i = 0; i < params.indices.size(); i++) {
            if (INDEX_TEXT == params.indices[i].type) {
                textIndexCount++;
            }
        }
        if (textIndexCount != 1) {
            // Don't leave tags on query tree.
            query.root()->resetTag();
            return Status(ErrorCodes::BadValue, "need exactly one text index for $text query");
        }

        // Error if the text node is tagged with zero indices.
        if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
            // Don't leave tags on query tree.
            query.root()->resetTag();
            return Status(ErrorCodes::BadValue,
                          "failed to use text index to satisfy $text query (if text index is "
                          "compound, are equality predicates given for all prefix fields?)");
        }

        // At this point, we know that there is only one text index and that the TEXT node is
        // assigned to it.
        invariant(1 == tag->first.size() + tag->notFirst.size());

        LOG(5) << "Rated tree after text processing:" << query.root()->toString();
    }

    // If we have any relevant indices, we try to create indexed plans.
    if (0 < relevantIndices.size()) {
        // The enumerator spits out trees tagged with IndexTag(s).
        PlanEnumeratorParams enumParams;
        enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION;
        enumParams.root = query.root();
        enumParams.indices = &relevantIndices;

        PlanEnumerator isp(enumParams);
        isp.init();

        MatchExpression* rawTree;
        while (isp.getNext(&rawTree) && (out->size() < params.maxIndexedSolutions)) {
            LOG(5) << "About to build solntree from tagged tree:" << endl
                   << rawTree->toString();

            // The tagged tree produced by the plan enumerator is not guaranteed
            // to be canonically sorted. In order to be compatible with the cached
            // data, sort the tagged tree according to CanonicalQuery ordering.
            std::unique_ptr<MatchExpression> clone(rawTree->shallowClone());
            CanonicalQuery::sortTree(clone.get());

            PlanCacheIndexTree* cacheData;
            Status indexTreeStatus =
                cacheDataFromTaggedTree(clone.get(), relevantIndices, &cacheData);
            if (!indexTreeStatus.isOK()) {
                LOG(5) << "Query is not cachable: " << indexTreeStatus.reason() << endl;
            }
            unique_ptr<PlanCacheIndexTree> autoData(cacheData);

            // This can fail if enumeration makes a mistake.
            QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
                query, rawTree, false, relevantIndices, params);

            if (NULL == solnRoot) {
                continue;
            }

            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
            if (NULL != soln) {
                LOG(5) << "Planner: adding solution:" << endl
                       << soln->toString();
                if (indexTreeStatus.isOK()) {
                    SolutionCacheData* scd = new SolutionCacheData();
                    scd->tree.reset(autoData.release());
                    soln->cacheData.reset(scd);
                }
                out->push_back(soln);
            }
        }
    }

    // Don't leave tags on query tree.
    query.root()->resetTag();

    LOG(5) << "Planner: outputted " << out->size() << " indexed solutions.\n";

    // Produce legible error message for failed OR planning with a TEXT child.
    // TODO: support collection scan for non-TEXT children of OR.
    if (out->size() == 0 && textNode != NULL && MatchExpression::OR == query.root()->matchType()) {
        MatchExpression* root = query.root();
        for (size_t i = 0; i < root->numChildren(); ++i) {
            if (textNode == root->getChild(i)) {
                return Status(ErrorCodes::BadValue,
                              "Failed to produce a solution for TEXT under OR - "
                              "other non-TEXT clauses under OR have to be indexed as well.");
            }
        }
    }

    // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
    // scan the entire index to provide results and output that as our plan.  This is the
    // desired behavior when an index is hinted that is not relevant to the query.
    if (!hintIndex.isEmpty()) {
        if (0 == out->size()) {
            QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
            verify(NULL != soln);
            LOG(5) << "Planner: outputting soln that uses hinted index as scan." << endl;
            out->push_back(soln);
        }
        return Status::OK();
    }

    // If a sort order is requested, there may be an index that provides it, even if that
    // index is not over any predicates in the query.
    //
    if (!query.getParsed().getSort().isEmpty() &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {
        // See if we have a sort provided from an index already.
        // This is implied by the presence of a non-blocking solution.
        bool usingIndexToSort = false;
        for (size_t i = 0; i < out->size(); ++i) {
            QuerySolution* soln = (*out)[i];
            if (!soln->hasBlockingStage) {
                usingIndexToSort = true;
                break;
            }
        }

        if (!usingIndexToSort) {
            for (size_t i = 0; i < params.indices.size(); ++i) {
                const IndexEntry& index = params.indices[i];
                // Only regular (non-plugin) indexes can be used to provide a sort, and only
                // non-sparse indexes can be used to provide a sort.
                //
                // TODO: Sparse indexes can't normally provide a sort, because non-indexed
                // documents could potentially be missing from the result set.  However, if the
                // query predicate can be used to guarantee that all documents to be returned
                // are indexed, then the index should be able to provide the sort.
                //
                // For example:
                // - Sparse index {a: 1, b: 1} should be able to provide a sort for
                //   find({b: 1}).sort({a: 1}).  SERVER-13908.
                // - Index {a: 1, b: "2dsphere"} (which is "geo-sparse", if
                //   2dsphereIndexVersion=2) should be able to provide a sort for
                //   find({b: GEO}).sort({a:1}).  SERVER-10801.
                if (index.type != INDEX_BTREE) {
                    continue;
                }
                if (index.sparse) {
                    continue;
                }

                // Partial indexes can only be used to provide a sort only if the query predicate is
                // compatible.
                if (index.filterExpr && !expression::isSubsetOf(query.root(), index.filterExpr)) {
                    continue;
                }

                const BSONObj kp = QueryPlannerAnalysis::getSortPattern(index.keyPattern);
                if (providesSort(query, kp)) {
                    LOG(5) << "Planner: outputting soln that uses index to provide sort." << endl;
                    QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                    if (NULL != soln) {
                        PlanCacheIndexTree* indexTree = new PlanCacheIndexTree();
                        indexTree->setIndexEntry(params.indices[i]);
                        SolutionCacheData* scd = new SolutionCacheData();
                        scd->tree.reset(indexTree);
                        scd->solnType = SolutionCacheData::WHOLE_IXSCAN_SOLN;
                        scd->wholeIXSolnDir = 1;

                        soln->cacheData.reset(scd);
                        out->push_back(soln);
                        break;
                    }
                }
                if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                    LOG(5) << "Planner: outputting soln that uses (reverse) index "
                           << "to provide sort." << endl;
                    QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                    if (NULL != soln) {
                        PlanCacheIndexTree* indexTree = new PlanCacheIndexTree();
                        indexTree->setIndexEntry(params.indices[i]);
                        SolutionCacheData* scd = new SolutionCacheData();
                        scd->tree.reset(indexTree);
                        scd->solnType = SolutionCacheData::WHOLE_IXSCAN_SOLN;
                        scd->wholeIXSolnDir = -1;

                        soln->cacheData.reset(scd);
                        out->push_back(soln);
                        break;
                    }
                }
            }
        }
    }

    // geoNear and text queries *require* an index.
    // Also, if a hint is specified it indicates that we MUST use it.
    bool possibleToCollscan =
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) &&
        !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && hintIndex.isEmpty();

    // The caller can explicitly ask for a collscan.
    bool collscanRequested = (params.options & QueryPlannerParams::INCLUDE_COLLSCAN);

    // No indexed plans?  We must provide a collscan if possible or else we can't run the query.
    bool collscanNeeded = (0 == out->size() && canTableScan);

    if (possibleToCollscan && (collscanRequested || collscanNeeded)) {
        QuerySolution* collscan = buildCollscanSoln(query, false, params);
        if (NULL != collscan) {
            SolutionCacheData* scd = new SolutionCacheData();
            scd->solnType = SolutionCacheData::COLLSCAN_SOLN;
            collscan->cacheData.reset(scd);
            out->push_back(collscan);
            LOG(5) << "Planner: outputting a collscan:" << endl
                   << collscan->toString();
        }
    }

    return Status::OK();
}
Example #27
0
    Status SubplanStage::planSubqueries() {
        // Adds the amount of time taken by planSubqueries() to executionTimeMillis. There's lots of
        // work that happens here, so this is needed for the time accounting to make sense.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        MatchExpression* orExpr = _query->root();

        for (size_t i = 0; i < _plannerParams.indices.size(); ++i) {
            const IndexEntry& ie = _plannerParams.indices[i];
            _indexMap[ie.keyPattern] = i;
            QLOG() << "Subplanner: index " << i << " is " << ie.toString() << endl;
        }

        const WhereCallbackReal whereCallback(_txn, _collection->ns().db());

        for (size_t i = 0; i < orExpr->numChildren(); ++i) {
            // We need a place to shove the results from planning this branch.
            _branchResults.push_back(new BranchPlanningResult());
            BranchPlanningResult* branchResult = _branchResults.back();

            MatchExpression* orChild = orExpr->getChild(i);

            // Turn the i-th child into its own query.
            {
                CanonicalQuery* orChildCQ;
                Status childCQStatus = CanonicalQuery::canonicalize(*_query,
                                                                    orChild,
                                                                    &orChildCQ,
                                                                    whereCallback);
                if (!childCQStatus.isOK()) {
                    mongoutils::str::stream ss;
                    ss << "Can't canonicalize subchild " << orChild->toString()
                       << " " << childCQStatus.reason();
                    return Status(ErrorCodes::BadValue, ss);
                }

                branchResult->canonicalQuery.reset(orChildCQ);
            }

            // Plan the i-th child. We might be able to find a plan for the i-th child in the plan
            // cache. If there's no cached plan, then we generate and rank plans using the MPS.
            CachedSolution* rawCS;
            if (PlanCache::shouldCacheQuery(*branchResult->canonicalQuery.get()) &&
                _collection->infoCache()->getPlanCache()->get(*branchResult->canonicalQuery.get(),
                                                              &rawCS).isOK()) {
                // We have a CachedSolution. Store it for later.
                QLOG() << "Subplanner: cached plan found for child " << i << " of "
                       << orExpr->numChildren();

                branchResult->cachedSolution.reset(rawCS);
            }
            else {
                // No CachedSolution found. We'll have to plan from scratch.
                QLOG() << "Subplanner: planning child " << i << " of " << orExpr->numChildren();

                // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from
                // considering any plan that's a collscan.
                Status status = QueryPlanner::plan(*branchResult->canonicalQuery.get(),
                                                   _plannerParams,
                                                   &branchResult->solutions.mutableVector());

                if (!status.isOK()) {
                    mongoutils::str::stream ss;
                    ss << "Can't plan for subchild "
                       << branchResult->canonicalQuery->toString()
                       << " " << status.reason();
                    return Status(ErrorCodes::BadValue, ss);
                }
                QLOG() << "Subplanner: got " << branchResult->solutions.size() << " solutions";

                if (0 == branchResult->solutions.size()) {
                    // If one child doesn't have an indexed solution, bail out.
                    mongoutils::str::stream ss;
                    ss << "No solutions for subchild " << branchResult->canonicalQuery->toString();
                    return Status(ErrorCodes::BadValue, ss);
                }
            }
        }

        return Status::OK();
    }
Example #28
0
    bool SubplanRunner::runSubplans() {
        // This is what we annotate with the index selections and then turn into a solution.
        auto_ptr<OrMatchExpression> theOr(
            static_cast<OrMatchExpression*>(_query->root()->shallowClone()));

        // This is the skeleton of index selections that is inserted into the cache.
        auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());

        for (size_t i = 0; i < theOr->numChildren(); ++i) {
            MatchExpression* orChild = theOr->getChild(i);

            auto_ptr<CanonicalQuery> orChildCQ(_cqs.front());
            _cqs.pop();

            // 'solutions' is owned by the SubplanRunner instance until
            // it is popped from the queue.
            vector<QuerySolution*> solutions = _solutions.front();
            _solutions.pop();

            // We already checked for zero solutions in planSubqueries(...).
            invariant(!solutions.empty());

            if (1 == solutions.size()) {
                // There is only one solution. Transfer ownership to an auto_ptr.
                auto_ptr<QuerySolution> autoSoln(solutions[0]);

                // We want a well-formed *indexed* solution.
                if (NULL == autoSoln->cacheData.get()) {
                    // For example, we don't cache things for 2d indices.
                    QLOG() << "Subplanner: No cache data for subchild " << orChild->toString();
                    return false;
                }

                if (SolutionCacheData::USE_INDEX_TAGS_SOLN != autoSoln->cacheData->solnType) {
                    QLOG() << "Subplanner: No indexed cache data for subchild "
                           << orChild->toString();
                    return false;
                }

                // Add the index assignments to our original query.
                Status tagStatus = QueryPlanner::tagAccordingToCache(
                    orChild, autoSoln->cacheData->tree.get(), _indexMap);

                if (!tagStatus.isOK()) {
                    QLOG() << "Subplanner: Failed to extract indices from subchild "
                           << orChild->toString();
                    return false;
                }

                // Add the child's cache data to the cache data we're creating for the main query.
                cacheData->children.push_back(autoSoln->cacheData->tree->clone());
            }
            else {
                // N solutions, rank them.  Takes ownership of orChildCQ.

                // the working set will be shared by the candidate plans and owned by the runner
                WorkingSet* sharedWorkingSet = new WorkingSet();

                MultiPlanStage* multiPlanStage = new MultiPlanStage(_collection,
                                                                    orChildCQ.get());

                // Dump all the solutions into the MPR.
                for (size_t ix = 0; ix < solutions.size(); ++ix) {
                    PlanStage* nextPlanRoot;
                    verify(StageBuilder::build(_txn,
                                               _collection,
                                               *solutions[ix],
                                               sharedWorkingSet,
                                               &nextPlanRoot));

                    // Owns first two arguments
                    multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet);
                }

                multiPlanStage->pickBestPlan();
                if (! multiPlanStage->bestPlanChosen()) {
                    QLOG() << "Subplanner: Failed to pick best plan for subchild "
                           << orChildCQ->toString();
                    return false;
                }

                Runner* mpr = new SingleSolutionRunner(_collection,
                                                       orChildCQ.release(),
                                                       multiPlanStage->bestSolution(),
                                                       multiPlanStage,
                                                       sharedWorkingSet);

                _underlyingRunner.reset(mpr);

                if (_killed) {
                    QLOG() << "Subplanner: Killed while picking best plan for subchild "
                           << orChild->toString();
                    return false;
                }

                QuerySolution* bestSoln = multiPlanStage->bestSolution();

                if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) {
                    QLOG() << "Subplanner: No indexed cache data for subchild "
                           << orChild->toString();
                    return false;
                }

                // Add the index assignments to our original query.
                Status tagStatus = QueryPlanner::tagAccordingToCache(
                    orChild, bestSoln->cacheData->tree.get(), _indexMap);

                if (!tagStatus.isOK()) {
                    QLOG() << "Subplanner: Failed to extract indices from subchild "
                           << orChild->toString();
                    return false;
                }

                cacheData->children.push_back(bestSoln->cacheData->tree->clone());
            }
        }

        // Must do this before using the planner functionality.
        sortUsingTags(theOr.get());

        // Use the cached index assignments to build solnRoot.  Takes ownership of 'theOr'
        QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
            *_query, theOr.release(), false, _plannerParams.indices);

        if (NULL == solnRoot) {
            QLOG() << "Subplanner: Failed to build indexed data path for subplanned query\n";
            return false;
        }

        QLOG() << "Subplanner: fully tagged tree is " << solnRoot->toString();

        // Takes ownership of 'solnRoot'
        QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*_query,
                                                                      _plannerParams,
                                                                      solnRoot);

        if (NULL == soln) {
            QLOG() << "Subplanner: Failed to analyze subplanned query";
            return false;
        }

        // We want our franken-solution to be cached.
        SolutionCacheData* scd = new SolutionCacheData();
        scd->tree.reset(cacheData.release());
        soln->cacheData.reset(scd);

        QLOG() << "Subplanner: Composite solution is " << soln->toString() << endl;

        // We use one of these even if there is one plan.  We do this so that the entry is cached
        // with stats obtained in the same fashion as a competitive ranking would have obtained
        // them.
        MultiPlanStage* multiPlanStage = new MultiPlanStage(_collection, _query.get());
        WorkingSet* ws = new WorkingSet();
        PlanStage* root;
        verify(StageBuilder::build(_txn, _collection, *soln, ws, &root));
        multiPlanStage->addPlan(soln, root, ws); // Takes ownership first two arguments.

        multiPlanStage->pickBestPlan();
        if (! multiPlanStage->bestPlanChosen()) {
            QLOG() << "Subplanner: Failed to pick best plan for subchild "
                   << _query->toString();
            return false;
        }

        Runner* mpr = new SingleSolutionRunner(_collection,
                                               _query.release(),
                                               multiPlanStage->bestSolution(),
                                               multiPlanStage,
                                               ws);
        _underlyingRunner.reset(mpr);

        return true;
    }
Example #29
0
    Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) {
        // This is what we annotate with the index selections and then turn into a solution.
        auto_ptr<OrMatchExpression> orExpr(
            static_cast<OrMatchExpression*>(_query->root()->shallowClone()));

        // This is the skeleton of index selections that is inserted into the cache.
        auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree());

        for (size_t i = 0; i < orExpr->numChildren(); ++i) {
            MatchExpression* orChild = orExpr->getChild(i);
            BranchPlanningResult* branchResult = _branchResults[i];

            if (branchResult->cachedSolution.get()) {
                // We can get the index tags we need out of the cache.
                Status tagStatus = tagOrChildAccordingToCache(
                    cacheData.get(),
                    branchResult->cachedSolution->plannerData[0],
                    orChild,
                    _indexMap);
                if (!tagStatus.isOK()) {
                    return tagStatus;
                }
            }
            else if (1 == branchResult->solutions.size()) {
                QuerySolution* soln = branchResult->solutions.front();
                Status tagStatus = tagOrChildAccordingToCache(cacheData.get(),
                                                              soln->cacheData.get(),
                                                              orChild,
                                                              _indexMap);
                if (!tagStatus.isOK()) {
                    return tagStatus;
                }
            }
            else {
                // N solutions, rank them.

                // We already checked for zero solutions in planSubqueries(...).
                invariant(!branchResult->solutions.empty());

                _ws->clear();

                _child.reset(new MultiPlanStage(_txn, _collection,
                                                branchResult->canonicalQuery.get()));
                MultiPlanStage* multiPlanStage = static_cast<MultiPlanStage*>(_child.get());

                // Dump all the solutions into the MPS.
                for (size_t ix = 0; ix < branchResult->solutions.size(); ++ix) {
                    PlanStage* nextPlanRoot;
                    invariant(StageBuilder::build(_txn,
                                                  _collection,
                                                  *branchResult->solutions[ix],
                                                  _ws,
                                                  &nextPlanRoot));

                    // Takes ownership of solution with index 'ix' and 'nextPlanRoot'.
                    multiPlanStage->addPlan(branchResult->solutions.releaseAt(ix),
                                            nextPlanRoot,
                                            _ws);
                }

                Status planSelectStat = multiPlanStage->pickBestPlan(yieldPolicy);
                if (!planSelectStat.isOK()) {
                    return planSelectStat;
                }

                if (!multiPlanStage->bestPlanChosen()) {
                    mongoutils::str::stream ss;
                    ss << "Failed to pick best plan for subchild "
                       << branchResult->canonicalQuery->toString();
                    return Status(ErrorCodes::BadValue, ss);
                }

                QuerySolution* bestSoln = multiPlanStage->bestSolution();

                // Check that we have good cache data. For example, we don't cache things
                // for 2d indices.
                if (NULL == bestSoln->cacheData.get()) {
                    mongoutils::str::stream ss;
                    ss << "No cache data for subchild " << orChild->toString();
                    return Status(ErrorCodes::BadValue, ss);
                }

                if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) {
                    mongoutils::str::stream ss;
                    ss << "No indexed cache data for subchild "
                       << orChild->toString();
                    return Status(ErrorCodes::BadValue, ss);
                }

                // Add the index assignments to our original query.
                Status tagStatus = QueryPlanner::tagAccordingToCache(
                    orChild, bestSoln->cacheData->tree.get(), _indexMap);

                if (!tagStatus.isOK()) {
                    mongoutils::str::stream ss;
                    ss << "Failed to extract indices from subchild "
                       << orChild->toString();
                    return Status(ErrorCodes::BadValue, ss);
                }

                cacheData->children.push_back(bestSoln->cacheData->tree->clone());
            }
        }

        // Must do this before using the planner functionality.
        sortUsingTags(orExpr.get());

        // Use the cached index assignments to build solnRoot.  Takes ownership of 'orExpr'.
        QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(
            *_query, orExpr.release(), false, _plannerParams.indices, _plannerParams);

        if (NULL == solnRoot) {
            mongoutils::str::stream ss;
            ss << "Failed to build indexed data path for subplanned query\n";
            return Status(ErrorCodes::BadValue, ss);
        }

        QLOG() << "Subplanner: fully tagged tree is " << solnRoot->toString();

        // Takes ownership of 'solnRoot'
        _compositeSolution.reset(QueryPlannerAnalysis::analyzeDataAccess(*_query,
                                                                         _plannerParams,
                                                                         solnRoot));

        if (NULL == _compositeSolution.get()) {
            mongoutils::str::stream ss;
            ss << "Failed to analyze subplanned query";
            return Status(ErrorCodes::BadValue, ss);
        }

        QLOG() << "Subplanner: Composite solution is " << _compositeSolution->toString() << endl;

        // Use the index tags from planning each branch to construct the composite solution,
        // and set that solution as our child stage.
        _ws->clear();
        PlanStage* root;
        invariant(StageBuilder::build(_txn, _collection, *_compositeSolution.get(), _ws, &root));
        _child.reset(root);

        return Status::OK();
    }
Example #30
0
    Status UpdateDriver::populateDocumentWithQueryFields(const CanonicalQuery* query,
                                                         mutablebson::Document& doc) const {

        MatchExpression* root = query->root();

        MatchExpression::MatchType rootType = root->matchType();

        // These copies are needed until we apply the modifiers at the end.
        std::vector<BSONObj> copies;

        // We only care about equality and "and"ed equality fields, everything else is ignored
        if (rootType != MatchExpression::EQ && rootType != MatchExpression::AND)
            return Status::OK();

        if (isDocReplacement()) {
            BSONElement idElem = query->getQueryObj().getField("_id");

            // Replacement mods need the _id field copied explicitly.
            if (idElem.ok()) {
                mb::Element elem = doc.makeElement(idElem);
                return doc.root().pushFront(elem);
            }

            return Status::OK();
        }

        // Create a new UpdateDriver to create the base doc from the query
        Options opts;
        opts.logOp = false;
        opts.multi = false;
        opts.upsert = true;
        opts.modOptions = modOptions();

        UpdateDriver insertDriver(opts);
        insertDriver.setContext(ModifierInterface::ExecInfo::INSERT_CONTEXT);

        // If we are a single equality match query
        if (root->matchType() == MatchExpression::EQ) {
            EqualityMatchExpression* eqMatch =
                    static_cast<EqualityMatchExpression*>(root);

            const BSONElement matchData = eqMatch->getData();
            BSONElement childElem = matchData;

            // Make copy to new path if not the same field name (for cases like $all)
            if (!root->path().empty() && matchData.fieldNameStringData() != root->path()) {
                BSONObjBuilder copyBuilder;
                copyBuilder.appendAs(eqMatch->getData(), root->path());
                const BSONObj copy = copyBuilder.obj();
                copies.push_back(copy);
                childElem = copy[root->path()];
            }

            // Add this element as a $set modifier
            Status s = insertDriver.addAndParse(modifiertable::MOD_SET,
                                                childElem);
            if (!s.isOK())
                return s;

        }
        else {

            // parse query $set mods, including only equality stuff
            for (size_t i = 0; i < root->numChildren(); ++i) {
                MatchExpression* child = root->getChild(i);
                if (child->matchType() == MatchExpression::EQ) {
                    EqualityMatchExpression* eqMatch =
                            static_cast<EqualityMatchExpression*>(child);

                    const BSONElement matchData = eqMatch->getData();
                    BSONElement childElem = matchData;

                    // Make copy to new path if not the same field name (for cases like $all)
                    if (!child->path().empty() &&
                            matchData.fieldNameStringData() != child->path()) {
                        BSONObjBuilder copyBuilder;
                        copyBuilder.appendAs(eqMatch->getData(), child->path());
                        const BSONObj copy = copyBuilder.obj();
                        copies.push_back(copy);
                        childElem = copy[child->path()];
                    }

                    // Add this element as a $set modifier
                    Status s = insertDriver.addAndParse(modifiertable::MOD_SET,
                                                        childElem);
                    if (!s.isOK())
                        return s;
                }
            }
        }

        // update the document with base field
        Status s = insertDriver.update(StringData(), &doc);
        copies.clear();
        if (!s.isOK()) {
            return Status(ErrorCodes::UnsupportedFormat,
                          str::stream() << "Cannot create base during"
                                           " insert of update. Caused by :"
                                        << s.toString());
        }

        return Status::OK();
    }