Пример #1
0
    TEST(ExpressionGeoTest, GeoNear1) {
        BSONObj query = fromjson("{loc:{$near:{$maxDistance:100, "
                                 "$geometry:{type:\"Point\", coordinates:[0,0]}}}}");
        NearQuery nq;
        ASSERT_OK(nq.parseFrom(query["loc"].Obj()));

        GeoNearMatchExpression gne;
        ASSERT(gne.init("a", nq, query).isOK());

        // We can't match the data but we can make sure it was parsed OK.
        ASSERT_EQUALS(gne.getData().centroid.crs, SPHERE);
        ASSERT_EQUALS(gne.getData().minDistance, 0);
        ASSERT_EQUALS(gne.getData().maxDistance, 100);
    }
    TEST( MatchExpressionParserGeoNear, ParseNear ) {
        BSONObj query = fromjson("{loc:{$near:{$maxDistance:100, "
                                 "$geometry:{type:\"Point\", coordinates:[0,0]}}}}");

        StatusWithMatchExpression result = MatchExpressionParser::parse( query );
        ASSERT_TRUE( result.isOK() );

        MatchExpression* exp = result.getValue();
        ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType());

        GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
        ASSERT_EQUALS(gnexp->getData().maxDistance, 100);
    }
// For $near, $nearSphere, and $geoNear syntax of:
// {
//   $near/$nearSphere/$geoNear: [ <x>, <y> ],
//   $minDistance: <distance in radians>,
//   $maxDistance: <distance in radians>
// }
TEST(MatchExpressionParserGeoNear, ParseValidNear) {
    BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}");

    StatusWithMatchExpression result = MatchExpressionParser::parse(query);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQ(gnexp->getData().maxDistance, 100);
    ASSERT_EQ(gnexp->getData().minDistance, 50);
}
// For $near, $nearSphere, and $geoNear syntax of:
// {
//   $near/$nearSphere/$geoNear: [ <x>, <y> ],
//   $minDistance: <distance in radians>,
//   $maxDistance: <distance in radians>
// }
TEST(MatchExpressionParserGeoNear, ParseValidNear) {
    BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}");

    const CollatorInterface* collator = nullptr;
    StatusWithMatchExpression result =
        MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQ(gnexp->getData().maxDistance, 100);
    ASSERT_EQ(gnexp->getData().minDistance, 50);
}
TEST(MatchExpressionParserGeoNear, ParseNear) {
    BSONObj query = fromjson(
        "{loc:{$near:{$maxDistance:100, "
        "$geometry:{type:\"Point\", coordinates:[0,0]}}}}");

    const CollatorInterface* collator = nullptr;
    StatusWithMatchExpression result =
        MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQUALS(gnexp->getData().maxDistance, 100);
}
Пример #6
0
TEST(MatchExpressionParserGeoNear, ParseValidNearSphere) {
    BSONObj query = fromjson("{loc: {$nearSphere: [0,0], $maxDistance: 100, $minDistance: 50}}");

    const CollatorInterface* collator = nullptr;
    const boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
    StatusWithMatchExpression result =
        MatchExpressionParser::parse(query,
                                     collator,
                                     expCtx,
                                     ExtensionsCallbackNoop(),
                                     MatchExpressionParser::kAllowAllSpecialFeatures);
    ASSERT_TRUE(result.isOK());

    MatchExpression* exp = result.getValue().get();
    ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType());

    GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp);
    ASSERT_EQ(gnexp->getData().maxDistance, 100);
    ASSERT_EQ(gnexp->getData().minDistance, 50);
}
Пример #7
0
    // static
    bool QueryPlannerIXSelect::compatible(const BSONElement& elt,
                                          const IndexEntry& index,
                                          MatchExpression* node) {
        // Historically one could create indices with any particular value for the index spec,
        // including values that now indicate a special index.  As such we have to make sure the
        // index type wasn't overridden before we pay attention to the string in the index key
        // pattern element.
        //
        // e.g. long ago we could have created an index {a: "2dsphere"} and it would
        // be treated as a btree index by an ancient version of MongoDB.  To try to run
        // 2dsphere queries over it would be folly.
        string indexedFieldType;
        if (String != elt.type() || (INDEX_BTREE == index.type)) {
            indexedFieldType = "";
        }
        else {
            indexedFieldType = elt.String();
        }

        // We know elt.fieldname() == node->path().
        MatchExpression::MatchType exprtype = node->matchType();

        if (indexedFieldType.empty()) {
            // Can't check for null w/a sparse index.
            if (exprtype == MatchExpression::EQ && index.sparse) {
                const EqualityMatchExpression* expr
                    = static_cast<const EqualityMatchExpression*>(node);
                if (expr->getData().isNull()) {
                    return false;
                }
            }

            // We can't use a btree-indexed field for geo expressions.
            if (exprtype == MatchExpression::GEO || exprtype == MatchExpression::GEO_NEAR) {
                return false;
            }

            // There are restrictions on when we can use the index if
            // the expression is a NOT.
            if (exprtype == MatchExpression::NOT) {
                // Prevent negated preds from using sparse or
                // multikey indices. We do so for sparse indices because
                // we will fail to return the documents which do not contain
                // the indexed fields.
                //
                // We avoid multikey indices because of the semantics of
                // negations on multikey fields. For example, with multikey
                // index {a:1}, the document {a: [1,2,3]} does *not* match
                // the query {a: {$ne: 3}}. We'd mess this up if we used
                // an index scan over [MinKey, 3) and (3, MaxKey] without
                // a filter.
                if (index.sparse || index.multikey) {
                    return false;
                }
                // Can't index negations of MOD or REGEX
                MatchExpression::MatchType childtype = node->getChild(0)->matchType();
                if (MatchExpression::REGEX == childtype ||
                    MatchExpression::MOD == childtype) {
                    return false;
                }
            }

            // We can only index EQ using text indices.  This is an artificial limitation imposed by
            // FTSSpec::getIndexPrefix() which will fail if there is not an EQ predicate on each
            // index prefix field of the text index.
            //
            // Example for key pattern {a: 1, b: "text"}:
            // - Allowed: node = {a: 7}
            // - Not allowed: node = {a: {$gt: 7}}

            if (INDEX_TEXT != index.type) {
                return true;
            }

            // If we're here we know it's a text index.  Equalities are OK anywhere in a text index.
            if (MatchExpression::EQ == exprtype) {
                return true;
            }

            // Not-equalities can only go in a suffix field of an index kp.  We look through the key
            // pattern to see if the field we're looking at now appears as a prefix.  If so, we
            // can't use this index for it.
            BSONObjIterator specIt(index.keyPattern);
            while (specIt.more()) {
                BSONElement elt = specIt.next();
                // We hit the dividing mark between prefix and suffix, so whatever field we're
                // looking at is a suffix, since it appears *after* the dividing mark between the
                // two.  As such, we can use the index.
                if (String == elt.type()) {
                    return true;
                }

                // If we're here, we're still looking at prefix elements.  We know that exprtype
                // isn't EQ so we can't use this index.
                if (node->path() == elt.fieldNameStringData()) {
                    return false;
                }
            }

            // NOTE: This shouldn't be reached.  Text index implies there is a separator implies we
            // will always hit the 'return true' above.
            invariant(0);
            return true;
        }
        else if (IndexNames::HASHED == indexedFieldType) {
            return exprtype == MatchExpression::MATCH_IN || exprtype == MatchExpression::EQ;
        }
        else if (IndexNames::GEO_2DSPHERE == indexedFieldType) {
            if (exprtype == MatchExpression::GEO) {
                // within or intersect.
                GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node);
                const GeoQuery& gq = gme->getGeoQuery();
                const GeometryContainer& gc = gq.getGeometry();
                return gc.hasS2Region();
            }
            else if (exprtype == MatchExpression::GEO_NEAR) {
                GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node);
                // Make sure the near query is compatible with 2dsphere.
                if (gnme->getData().centroid.crs == SPHERE || gnme->getData().isNearSphere) {
                    return true;
                }
            }
            return false;
        }
        else if (IndexNames::GEO_2D == indexedFieldType) {
            if (exprtype == MatchExpression::GEO_NEAR) {
                GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node);
                return gnme->getData().centroid.crs == FLAT;
            }
            else if (exprtype == MatchExpression::GEO) {
                // 2d only supports within.
                GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node);
                const GeoQuery& gq = gme->getGeoQuery();
                if (GeoQuery::WITHIN != gq.getPred()) {
                    return false;
                }

                const GeometryContainer& gc = gq.getGeometry();

                // 2d indices answer flat queries.
                if (gc.hasFlatRegion()) {
                    return true;
                }

                // 2d indices can answer centerSphere queries.
                if (NULL == gc._cap.get()) {
                    return false;
                }

                verify(SPHERE == gc._cap->crs);
                const Circle& circle = gc._cap->circle;

                // No wrapping around the edge of the world is allowed in 2d centerSphere.
                return twoDWontWrap(circle, index);
            }
            return false;
        }
        else if (IndexNames::TEXT == indexedFieldType) {
            return (exprtype == MatchExpression::TEXT);
        }
        else if (IndexNames::GEO_HAYSTACK == indexedFieldType) {
            return false;
        }
        else {
            warning() << "Unknown indexing for node " << node->toString()
                      << " and field " << elt.toString() << endl;
            verify(0);
        }
    }
Пример #8
0
    // static
    void QueryPlanner::plan(const CanonicalQuery& query,
                            const QueryPlannerParams& params,
                            vector<QuerySolution*>* out) {
        QLOG() << "=============================\n"
               << "Beginning planning, options = " << optionString(params.options) << endl
               << "Canonical query:\n" << query.toString() << endl
               << "============================="
               << endl;

        // The shortcut formerly known as IDHACK.  See if it's a simple _id query.  If so we might
        // just make an ixscan over the _id index and bypass the rest of planning entirely.
        if (!query.getParsed().isExplain() && !query.getParsed().showDiskLoc()
            && isSimpleIdQuery(query.getParsed().getFilter())
            && !query.getParsed().hasOption(QueryOption_CursorTailable)) {

            // See if we can find an _id index.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    const IndexEntry& index = params.indices[i];
                    QLOG() << "IDHACK using index " << index.toString() << endl;

                    // If so, we make a simple scan to find the doc.
                    IndexScanNode* isn = new IndexScanNode();
                    isn->indexKeyPattern = index.keyPattern;
                    isn->indexIsMultiKey = index.multikey;
                    isn->direction = 1;
                    isn->bounds.isSimpleRange = true;
                    BSONObj key = getKeyFromQuery(index.keyPattern, query.getParsed().getFilter());
                    isn->bounds.startKey = isn->bounds.endKey = key;
                    isn->bounds.endKeyInclusive = true;
                    isn->computeProperties();

                    QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, isn);

                    if (NULL != soln) {
                        out->push_back(soln);
                        QLOG() << "IDHACK solution is:\n" << (*out)[0]->toString() << endl;
                        // And that's it.
                        return;
                    }
                }
            }
        }

        for (size_t i = 0; i < params.indices.size(); ++i) {
            QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl;
        }

        bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

        // If the query requests a tailable cursor, the only solution is a collscan + filter with
        // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
        // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
        // can't provide one.  Is this what we want?
        if (query.getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
                && canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, true, params);
                if (NULL != soln) {
                    out->push_back(soln);
                }
            }
            return;
        }

        // The hint can be $natural: 1.  If this happens, output a collscan.  It's a weird way of
        // saying "table scan for two, please."
        if (!query.getParsed().getHint().isEmpty()) {
            BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural");
            if (!natural.eoo()) {
                QLOG() << "forcing a table scan due to hinted $natural\n";
                if (canTableScan) {
                    QuerySolution* soln = buildCollscanSoln(query, false, params);
                    if (NULL != soln) {
                        out->push_back(soln);
                    }
                }
                return;
            }
        }

        // NOR and NOT we can't handle well with indices.  If we see them here, they weren't
        // rewritten to remove the negation.  Just output a collscan for those.
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOT)
            || QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOR)) {

            // If there's a near predicate, we can't handle this.
            // TODO: Should canonicalized query detect this?
            if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)) {
                warning() << "Can't handle NOT/NOR with GEO_NEAR";
                return;
            }
            QLOG() << "NOT/NOR in plan, just outtping a collscan\n";
            if (canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, false, params);
                if (NULL != soln) {
                    out->push_back(soln);
                }
            }
            return;
        }

        // Figure out what fields we care about.
        unordered_set<string> fields;
        QueryPlannerIXSelect::getFields(query.root(), "", &fields);

        for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
            QLOG() << "predicate over field " << *it << endl;
        }

        // Filter our indices so we only look at indices that are over our predicates.
        vector<IndexEntry> relevantIndices;

        // Hints require us to only consider the hinted index.
        BSONObj hintIndex = query.getParsed().getHint();

        // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
        // plan.  If that fails, just scan the _id index.
        if (query.getParsed().isSnapshot()) {
            // Find the ID index in indexKeyPatterns.  It's our hint.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    hintIndex = params.indices[i].keyPattern;
                    break;
                }
            }
        }

        size_t hintIndexNumber = numeric_limits<size_t>::max();

        if (!hintIndex.isEmpty()) {
            // Sigh.  If the hint is specified it might be using the index name.
            BSONElement firstHintElt = hintIndex.firstElement();
            if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
                string hintName = firstHintElt.String();
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (params.indices[i].name == hintName) {
                        QLOG() << "hint by name specified, restricting indices to "
                             << params.indices[i].keyPattern.toString() << endl;
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        hintIndexNumber = i;
                        hintIndex = params.indices[i].keyPattern;
                        break;
                    }
                }
            }
            else {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        QLOG() << "hint specified, restricting indices to " << hintIndex.toString()
                             << endl;
                        hintIndexNumber = i;
                        break;
                    }
                }
            }

            if (hintIndexNumber == numeric_limits<size_t>::max()) {
                // This is supposed to be an error.
                warning() << "Can't find hint for " << hintIndex.toString();
                return;
            }
        }
        else {
            QLOG() << "Finding relevant indices\n";
            QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);
        }

        for (size_t i = 0; i < relevantIndices.size(); ++i) {
            QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl;
        }

        // Figure out how useful each index is to each predicate.
        // query.root() is now annotated with RelevantTag(s).
        QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);

        QLOG() << "rated tree" << endl;
        QLOG() << query.root()->toString() << endl;

        // If there is a GEO_NEAR it must have an index it can use directly.
        // XXX: move into data access?
        MatchExpression* gnNode = NULL;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
            // No index for GEO_NEAR?  No query.
            RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return;
            }

            GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode);

            vector<size_t> newFirst;

            // 2d + GEO_NEAR is annoying.  Because 2d's GEO_NEAR isn't streaming we have to embed
            // the full query tree inside it as a matcher.
            for (size_t i = 0; i < tag->first.size(); ++i) {
                // GEO_NEAR has a non-2d index it can use.  We can deal w/that in normal planning.
                if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) {
                    newFirst.push_back(i);
                    continue;
                }

                // If we're here, GEO_NEAR has a 2d index.  We create a 2dgeonear plan with the
                // entire tree as a filter, if possible.

                GeoNear2DNode* solnRoot = new GeoNear2DNode();
                solnRoot->nq = gnme->getData();

                if (MatchExpression::GEO_NEAR != query.root()->matchType()) {
                    // root is an AND, clone and delete the GEO_NEAR child.
                    MatchExpression* filterTree = query.root()->shallowClone();
                    verify(MatchExpression::AND == filterTree->matchType());

                    bool foundChild = false;
                    for (size_t i = 0; i < filterTree->numChildren(); ++i) {
                        if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) {
                            foundChild = true;
                            filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i);
                            break;
                        }
                    }
                    verify(foundChild);
                    solnRoot->filter.reset(filterTree);
                }

                solnRoot->numWanted = query.getParsed().getNumToReturn();
                if (0 == solnRoot->numWanted) {
                    solnRoot->numWanted = 100;
                }
                solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern;

                // Remove the 2d index.  2d can only be the first field, and we know there is
                // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll
                // only be first for gnNode.
                tag->first.erase(tag->first.begin() + i);

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);

                if (NULL != soln) {
                    out->push_back(soln);
                }
            }

            // Continue planning w/non-2d indices tagged for this pred.
            tag->first.swap(newFirst);

            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return;
            }
        }

        // Likewise, if there is a TEXT it must have an index it can use directly.
        MatchExpression* textNode;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
            RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return;
            }
        }

        // If we have any relevant indices, we try to create indexed plans.
        if (0 < relevantIndices.size()) {
            // The enumerator spits out trees tagged with IndexTag(s).
            PlanEnumerator isp(query.root(), &relevantIndices);
            isp.init();

            MatchExpression* rawTree;
            while (isp.getNext(&rawTree)) {
                QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString()
                       << endl;

                // This can fail if enumeration makes a mistake.
                QuerySolutionNode* solnRoot =
                    QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices);

                if (NULL == solnRoot) { continue; }

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
                if (NULL != soln) {
                    QLOG() << "Planner: adding solution:\n" << soln->toString() << endl;
                    out->push_back(soln);
                }
            }
        }

        QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n";

        // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
        // scan the entire index to provide results and output that as our plan.  This is the
        // desired behavior when an index is hinted that is not relevant to the query.
        if (!hintIndex.isEmpty() && (0 == out->size())) {
            QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
            if (NULL != soln) {
                QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl;
                out->push_back(soln);
            }
            return;
        }

        // If a sort order is requested, there may be an index that provides it, even if that
        // index is not over any predicates in the query.
        //
        // XXX XXX: Can we do this even if the index is sparse?  Might we miss things?
        if (!query.getParsed().getSort().isEmpty()
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {

            // See if we have a sort provided from an index already.
            bool usingIndexToSort = false;
            for (size_t i = 0; i < out->size(); ++i) {
                QuerySolution* soln = (*out)[i];
                if (!soln->hasSortStage) {
                    usingIndexToSort = true;
                    break;
                }
            }

            if (!usingIndexToSort) {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const BSONObj& kp = params.indices[i].keyPattern;
                    if (providesSort(query, kp)) {
                        QLOG() << "Planner: outputting soln that uses index to provide sort."
                               << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                    if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                        QLOG() << "Planner: outputting soln that uses (reverse) index "
                               << "to provide sort." << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                }
            }
        }

        // TODO: Do we always want to offer a collscan solution?
        // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs.
        if (    !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
             && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)
             && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan)))
        {
            QuerySolution* collscan = buildCollscanSoln(query, false, params);
            if (NULL != collscan) {
                out->push_back(collscan);
                QLOG() << "Planner: outputting a collscan:\n";
                QLOG() << collscan->toString() << endl;
            }
        }
    }
Пример #9
0
    // static
    bool QueryPlannerIXSelect::compatible(const BSONElement& elt,
                                          const IndexEntry& index,
                                          MatchExpression* node) {
        // XXX: CatalogHack::getAccessMethodName: do we have to worry about this?  when?
        string ixtype;
        if (String != elt.type()) {
            ixtype = "";
        }
        else {
            ixtype = elt.String();
        }

        // We know elt.fieldname() == node->path().
        MatchExpression::MatchType exprtype = node->matchType();

        // TODO: use indexnames
        if ("" == ixtype) {
            if (index.sparse && exprtype == MatchExpression::EQ) {
                // Can't check for null w/a sparse index.
                const EqualityMatchExpression* expr
                    = static_cast<const EqualityMatchExpression*>(node);
                return !expr->getData().isNull();
            }
            return exprtype != MatchExpression::GEO && exprtype != MatchExpression::GEO_NEAR;
        }
        else if ("hashed" == ixtype) {
            return exprtype == MatchExpression::MATCH_IN || exprtype == MatchExpression::EQ;
        }
        else if ("2dsphere" == ixtype) {
            if (exprtype == MatchExpression::GEO) {
                // within or intersect.
                GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node);
                const GeoQuery& gq = gme->getGeoQuery();
                const GeometryContainer& gc = gq.getGeometry();
                return gc.hasS2Region();
            }
            else if (exprtype == MatchExpression::GEO_NEAR) {
                GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node);
                // Make sure the near query is compatible with 2dsphere.
                if (gnme->getData().centroid.crs == SPHERE || gnme->getData().isNearSphere) {
                    return true;
                }
            }
            return false;
        }
        else if ("2d" == ixtype) {
            if (exprtype == MatchExpression::GEO_NEAR) {
                GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node);
                return gnme->getData().centroid.crs == FLAT;
            }
            else if (exprtype == MatchExpression::GEO) {
                // 2d only supports within.
                GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node);
                const GeoQuery& gq = gme->getGeoQuery();
                if (GeoQuery::WITHIN != gq.getPred()) {
                    return false;
                }

                const GeometryContainer& gc = gq.getGeometry();

                // 2d indices answer flat queries.
                if (gc.hasFlatRegion()) {
                    return true;
                }

                // 2d indices can answer centerSphere queries.
                if (NULL == gc._cap.get()) {
                    return false;
                }

                verify(SPHERE == gc._cap->crs);
                const Circle& circle = gc._cap->circle;

                // No wrapping around the edge of the world is allowed in 2d centerSphere.
                return twoDWontWrap(circle, index);
            }
            return false;
        }
        else if ("text" == ixtype) {
            return (exprtype == MatchExpression::TEXT);
        }
        else if ("geoHaystack" == ixtype) {
            return false;
        }
        else {
            warning() << "Unknown indexing for node " << node->toString()
                      << " and field " << elt.toString() << endl;
            verify(0);
        }
    }
Пример #10
0
    // static
    Status QueryPlanner::plan(const CanonicalQuery& query,
                              const QueryPlannerParams& params,
                              std::vector<QuerySolution*>* out) {

        QLOG() << "=============================\n"
               << "Beginning planning, options = " << optionString(params.options) << endl
               << "Canonical query:\n" << query.toString() << endl
               << "============================="
               << endl;

        for (size_t i = 0; i < params.indices.size(); ++i) {
            QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl;
        }

        bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN);

        // If the query requests a tailable cursor, the only solution is a collscan + filter with
        // tailable set on the collscan.  TODO: This is a policy departure.  Previously I think you
        // could ask for a tailable cursor and it just tried to give you one.  Now, we fail if we
        // can't provide one.  Is this what we want?
        if (query.getParsed().hasOption(QueryOption_CursorTailable)) {
            if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
                && canTableScan) {
                QuerySolution* soln = buildCollscanSoln(query, true, params);
                if (NULL != soln) {
                    out->push_back(soln);
                }
            }
            return Status::OK();
        }

        // The hint can be $natural: 1.  If this happens, output a collscan.  It's a weird way of
        // saying "table scan for two, please."
        if (!query.getParsed().getHint().isEmpty()) {
            BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural");
            if (!natural.eoo()) {
                QLOG() << "forcing a table scan due to hinted $natural\n";
                // min/max are incompatible with $natural.
                if (canTableScan && query.getParsed().getMin().isEmpty()
                                 && query.getParsed().getMax().isEmpty()) {
                    QuerySolution* soln = buildCollscanSoln(query, false, params);
                    if (NULL != soln) {
                        out->push_back(soln);
                    }
                }
                return Status::OK();
            }
        }

        // Figure out what fields we care about.
        unordered_set<string> fields;
        QueryPlannerIXSelect::getFields(query.root(), "", &fields);

        for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) {
            QLOG() << "predicate over field " << *it << endl;
        }

        // Filter our indices so we only look at indices that are over our predicates.
        vector<IndexEntry> relevantIndices;

        // Hints require us to only consider the hinted index.
        BSONObj hintIndex = query.getParsed().getHint();

        // Snapshot is a form of a hint.  If snapshot is set, try to use _id index to make a real
        // plan.  If that fails, just scan the _id index.
        if (query.getParsed().isSnapshot()) {
            // Find the ID index in indexKeyPatterns.  It's our hint.
            for (size_t i = 0; i < params.indices.size(); ++i) {
                if (isIdIndex(params.indices[i].keyPattern)) {
                    hintIndex = params.indices[i].keyPattern;
                    break;
                }
            }
        }

        size_t hintIndexNumber = numeric_limits<size_t>::max();

        if (hintIndex.isEmpty()) {
            QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices);
        }
        else {
            // Sigh.  If the hint is specified it might be using the index name.
            BSONElement firstHintElt = hintIndex.firstElement();
            if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) {
                string hintName = firstHintElt.String();
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (params.indices[i].name == hintName) {
                        QLOG() << "hint by name specified, restricting indices to "
                             << params.indices[i].keyPattern.toString() << endl;
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        hintIndexNumber = i;
                        hintIndex = params.indices[i].keyPattern;
                        break;
                    }
                }
            }
            else {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) {
                        relevantIndices.clear();
                        relevantIndices.push_back(params.indices[i]);
                        QLOG() << "hint specified, restricting indices to " << hintIndex.toString()
                             << endl;
                        hintIndexNumber = i;
                        break;
                    }
                }
            }

            if (hintIndexNumber == numeric_limits<size_t>::max()) {
                return Status(ErrorCodes::BadValue, "bad hint");
            }
        }

        // Deal with the .min() and .max() query options.  If either exist we can only use an index
        // that matches the object inside.
        if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) {
            BSONObj minObj = query.getParsed().getMin();
            BSONObj maxObj = query.getParsed().getMax();

            // This is the index into params.indices[...] that we use.
            size_t idxNo = numeric_limits<size_t>::max();

            // If there's an index hinted we need to be able to use it.
            if (!hintIndex.isEmpty()) {
                if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) {
                    QLOG() << "minobj doesnt work w hint";
                    return Status(ErrorCodes::BadValue,
                                  "hint provided does not work with min query");
                }

                if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) {
                    QLOG() << "maxobj doesnt work w hint";
                    return Status(ErrorCodes::BadValue,
                                  "hint provided does not work with max query");
                }

                idxNo = hintIndexNumber;
            }
            else {
                // No hinted index, look for one that is compatible (has same field names and
                // ordering thereof).
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const BSONObj& kp = params.indices[i].keyPattern;

                    BSONObj toUse = minObj.isEmpty() ? maxObj : minObj;
                    if (indexCompatibleMaxMin(toUse, kp)) {
                        idxNo = i;
                        break;
                    }
                }
            }
            
            if (idxNo == numeric_limits<size_t>::max()) {
                QLOG() << "Can't find relevant index to use for max/min query";
                // Can't find an index to use, bail out.
                return Status(ErrorCodes::BadValue,
                              "unable to find relevant index for max/min query");
            }

            // maxObj can be empty; the index scan just goes until the end.  minObj can't be empty
            // though, so if it is, we make a minKey object.
            if (minObj.isEmpty()) {
                BSONObjBuilder bob;
                bob.appendMinKey("");
                minObj = bob.obj();
            }
            else {
                // Must strip off the field names to make an index key.
                minObj = stripFieldNames(minObj);
            }

            if (!maxObj.isEmpty()) {
                // Must strip off the field names to make an index key.
                maxObj = stripFieldNames(maxObj);
            }

            QLOG() << "max/min query using index " << params.indices[idxNo].toString() << endl;

            // Make our scan and output.
            QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan(params.indices[idxNo],
                                                                            query,
                                                                            params,
                                                                            minObj,
                                                                            maxObj);

            QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
            if (NULL != soln) {
                out->push_back(soln);
            }

            return Status::OK();
        }

        for (size_t i = 0; i < relevantIndices.size(); ++i) {
            QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl;
        }

        // Figure out how useful each index is to each predicate.
        // query.root() is now annotated with RelevantTag(s).
        QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices);

        QLOG() << "rated tree" << endl;
        QLOG() << query.root()->toString() << endl;

        // If there is a GEO_NEAR it must have an index it can use directly.
        // XXX: move into data access?
        MatchExpression* gnNode = NULL;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) {
            // No index for GEO_NEAR?  No query.
            RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                QLOG() << "unable to find index for $geoNear query" << endl;
                return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query");
            }

            GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode);

            vector<size_t> newFirst;

            // 2d + GEO_NEAR is annoying.  Because 2d's GEO_NEAR isn't streaming we have to embed
            // the full query tree inside it as a matcher.
            for (size_t i = 0; i < tag->first.size(); ++i) {
                // GEO_NEAR has a non-2d index it can use.  We can deal w/that in normal planning.
                if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) {
                    newFirst.push_back(i);
                    continue;
                }

                // If we're here, GEO_NEAR has a 2d index.  We create a 2dgeonear plan with the
                // entire tree as a filter, if possible.

                GeoNear2DNode* solnRoot = new GeoNear2DNode();
                solnRoot->nq = gnme->getData();
                if (NULL != query.getProj()) {
                    solnRoot->addPointMeta = query.getProj()->wantGeoNearPoint();
                    solnRoot->addDistMeta = query.getProj()->wantGeoNearDistance();
                }

                if (MatchExpression::GEO_NEAR != query.root()->matchType()) {
                    // root is an AND, clone and delete the GEO_NEAR child.
                    MatchExpression* filterTree = query.root()->shallowClone();
                    verify(MatchExpression::AND == filterTree->matchType());

                    bool foundChild = false;
                    for (size_t i = 0; i < filterTree->numChildren(); ++i) {
                        if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) {
                            foundChild = true;
                            filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i);
                            break;
                        }
                    }
                    verify(foundChild);
                    solnRoot->filter.reset(filterTree);
                }

                solnRoot->numWanted = query.getParsed().getNumToReturn();
                if (0 == solnRoot->numWanted) {
                    solnRoot->numWanted = 100;
                }
                solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern;

                // Remove the 2d index.  2d can only be the first field, and we know there is
                // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll
                // only be first for gnNode.
                tag->first.erase(tag->first.begin() + i);

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);

                if (NULL != soln) {
                    out->push_back(soln);
                }
            }

            // Continue planning w/non-2d indices tagged for this pred.
            tag->first.swap(newFirst);

            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return Status::OK();
            }
        }

        // Likewise, if there is a TEXT it must have an index it can use directly.
        MatchExpression* textNode;
        if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) {
            RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag());
            if (0 == tag->first.size() && 0 == tag->notFirst.size()) {
                return Status::OK();
            }
        }

        // If we have any relevant indices, we try to create indexed plans.
        if (0 < relevantIndices.size()) {
            // The enumerator spits out trees tagged with IndexTag(s).
            PlanEnumeratorParams enumParams;
            enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION;
            enumParams.root = query.root();
            enumParams.indices = &relevantIndices;

            PlanEnumerator isp(enumParams);
            isp.init();

            MatchExpression* rawTree;
            // XXX: have limit on # of indexed solns we'll consider.  We could have a perverse
            // query and index that could make n^2 very unpleasant.
            while (isp.getNext(&rawTree)) {
                QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString()
                       << endl;

                // This can fail if enumeration makes a mistake.
                QuerySolutionNode* solnRoot =
                    QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices);

                if (NULL == solnRoot) { continue; }

                QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot);
                if (NULL != soln) {
                    QLOG() << "Planner: adding solution:\n" << soln->toString() << endl;
                    out->push_back(soln);
                }
            }
        }

        QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n";

        // An index was hinted.  If there are any solutions, they use the hinted index.  If not, we
        // scan the entire index to provide results and output that as our plan.  This is the
        // desired behavior when an index is hinted that is not relevant to the query.
        if (!hintIndex.isEmpty()) {
            if (0 == out->size()) {
                QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params);
                verify(NULL != soln);
                QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl;
                out->push_back(soln);
            }
            return Status::OK();
        }

        // If a sort order is requested, there may be an index that provides it, even if that
        // index is not over any predicates in the query.
        //
        if (!query.getParsed().getSort().isEmpty()
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
            && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) {

            // See if we have a sort provided from an index already.
            bool usingIndexToSort = false;
            for (size_t i = 0; i < out->size(); ++i) {
                QuerySolution* soln = (*out)[i];
                if (!soln->hasSortStage) {
                    usingIndexToSort = true;
                    break;
                }
            }

            if (!usingIndexToSort) {
                for (size_t i = 0; i < params.indices.size(); ++i) {
                    const IndexEntry& index = params.indices[i];
                    if (index.sparse) {
                        continue;
                    }
                    const BSONObj kp = LiteParsedQuery::normalizeSortOrder(index.keyPattern);
                    if (providesSort(query, kp)) {
                        QLOG() << "Planner: outputting soln that uses index to provide sort."
                               << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                    if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) {
                        QLOG() << "Planner: outputting soln that uses (reverse) index "
                               << "to provide sort." << endl;
                        QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1);
                        if (NULL != soln) {
                            out->push_back(soln);
                            break;
                        }
                    }
                }
            }
        }

        // TODO: Do we always want to offer a collscan solution?
        // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs.
        if (    !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)
             && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)
             && hintIndex.isEmpty()
             && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan)))
        {
            QuerySolution* collscan = buildCollscanSoln(query, false, params);
            if (NULL != collscan) {
                out->push_back(collscan);
                QLOG() << "Planner: outputting a collscan:\n";
                QLOG() << collscan->toString() << endl;
            }
        }

        return Status::OK();
    }