TEST(ExpressionGeoTest, GeoNear1) { BSONObj query = fromjson("{loc:{$near:{$maxDistance:100, " "$geometry:{type:\"Point\", coordinates:[0,0]}}}}"); NearQuery nq; ASSERT_OK(nq.parseFrom(query["loc"].Obj())); GeoNearMatchExpression gne; ASSERT(gne.init("a", nq, query).isOK()); // We can't match the data but we can make sure it was parsed OK. ASSERT_EQUALS(gne.getData().centroid.crs, SPHERE); ASSERT_EQUALS(gne.getData().minDistance, 0); ASSERT_EQUALS(gne.getData().maxDistance, 100); }
TEST( MatchExpressionParserGeoNear, ParseNear ) { BSONObj query = fromjson("{loc:{$near:{$maxDistance:100, " "$geometry:{type:\"Point\", coordinates:[0,0]}}}}"); StatusWithMatchExpression result = MatchExpressionParser::parse( query ); ASSERT_TRUE( result.isOK() ); MatchExpression* exp = result.getValue(); ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQUALS(gnexp->getData().maxDistance, 100); }
// For $near, $nearSphere, and $geoNear syntax of: // { // $near/$nearSphere/$geoNear: [ <x>, <y> ], // $minDistance: <distance in radians>, // $maxDistance: <distance in radians> // } TEST(MatchExpressionParserGeoNear, ParseValidNear) { BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}"); StatusWithMatchExpression result = MatchExpressionParser::parse(query); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQ(gnexp->getData().maxDistance, 100); ASSERT_EQ(gnexp->getData().minDistance, 50); }
// For $near, $nearSphere, and $geoNear syntax of: // { // $near/$nearSphere/$geoNear: [ <x>, <y> ], // $minDistance: <distance in radians>, // $maxDistance: <distance in radians> // } TEST(MatchExpressionParserGeoNear, ParseValidNear) { BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}"); const CollatorInterface* collator = nullptr; StatusWithMatchExpression result = MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQ(gnexp->getData().maxDistance, 100); ASSERT_EQ(gnexp->getData().minDistance, 50); }
TEST(MatchExpressionParserGeoNear, ParseNear) { BSONObj query = fromjson( "{loc:{$near:{$maxDistance:100, " "$geometry:{type:\"Point\", coordinates:[0,0]}}}}"); const CollatorInterface* collator = nullptr; StatusWithMatchExpression result = MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQUALS(gnexp->getData().maxDistance, 100); }
TEST(MatchExpressionParserGeoNear, ParseValidNearSphere) { BSONObj query = fromjson("{loc: {$nearSphere: [0,0], $maxDistance: 100, $minDistance: 50}}"); const CollatorInterface* collator = nullptr; const boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); StatusWithMatchExpression result = MatchExpressionParser::parse(query, collator, expCtx, ExtensionsCallbackNoop(), MatchExpressionParser::kAllowAllSpecialFeatures); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQ(gnexp->getData().maxDistance, 100); ASSERT_EQ(gnexp->getData().minDistance, 50); }
// static bool QueryPlannerIXSelect::compatible(const BSONElement& elt, const IndexEntry& index, MatchExpression* node) { // Historically one could create indices with any particular value for the index spec, // including values that now indicate a special index. As such we have to make sure the // index type wasn't overridden before we pay attention to the string in the index key // pattern element. // // e.g. long ago we could have created an index {a: "2dsphere"} and it would // be treated as a btree index by an ancient version of MongoDB. To try to run // 2dsphere queries over it would be folly. string indexedFieldType; if (String != elt.type() || (INDEX_BTREE == index.type)) { indexedFieldType = ""; } else { indexedFieldType = elt.String(); } // We know elt.fieldname() == node->path(). MatchExpression::MatchType exprtype = node->matchType(); if (indexedFieldType.empty()) { // Can't check for null w/a sparse index. if (exprtype == MatchExpression::EQ && index.sparse) { const EqualityMatchExpression* expr = static_cast<const EqualityMatchExpression*>(node); if (expr->getData().isNull()) { return false; } } // We can't use a btree-indexed field for geo expressions. if (exprtype == MatchExpression::GEO || exprtype == MatchExpression::GEO_NEAR) { return false; } // There are restrictions on when we can use the index if // the expression is a NOT. if (exprtype == MatchExpression::NOT) { // Prevent negated preds from using sparse or // multikey indices. We do so for sparse indices because // we will fail to return the documents which do not contain // the indexed fields. // // We avoid multikey indices because of the semantics of // negations on multikey fields. For example, with multikey // index {a:1}, the document {a: [1,2,3]} does *not* match // the query {a: {$ne: 3}}. We'd mess this up if we used // an index scan over [MinKey, 3) and (3, MaxKey] without // a filter. if (index.sparse || index.multikey) { return false; } // Can't index negations of MOD or REGEX MatchExpression::MatchType childtype = node->getChild(0)->matchType(); if (MatchExpression::REGEX == childtype || MatchExpression::MOD == childtype) { return false; } } // We can only index EQ using text indices. This is an artificial limitation imposed by // FTSSpec::getIndexPrefix() which will fail if there is not an EQ predicate on each // index prefix field of the text index. // // Example for key pattern {a: 1, b: "text"}: // - Allowed: node = {a: 7} // - Not allowed: node = {a: {$gt: 7}} if (INDEX_TEXT != index.type) { return true; } // If we're here we know it's a text index. Equalities are OK anywhere in a text index. if (MatchExpression::EQ == exprtype) { return true; } // Not-equalities can only go in a suffix field of an index kp. We look through the key // pattern to see if the field we're looking at now appears as a prefix. If so, we // can't use this index for it. BSONObjIterator specIt(index.keyPattern); while (specIt.more()) { BSONElement elt = specIt.next(); // We hit the dividing mark between prefix and suffix, so whatever field we're // looking at is a suffix, since it appears *after* the dividing mark between the // two. As such, we can use the index. if (String == elt.type()) { return true; } // If we're here, we're still looking at prefix elements. We know that exprtype // isn't EQ so we can't use this index. if (node->path() == elt.fieldNameStringData()) { return false; } } // NOTE: This shouldn't be reached. Text index implies there is a separator implies we // will always hit the 'return true' above. invariant(0); return true; } else if (IndexNames::HASHED == indexedFieldType) { return exprtype == MatchExpression::MATCH_IN || exprtype == MatchExpression::EQ; } else if (IndexNames::GEO_2DSPHERE == indexedFieldType) { if (exprtype == MatchExpression::GEO) { // within or intersect. GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node); const GeoQuery& gq = gme->getGeoQuery(); const GeometryContainer& gc = gq.getGeometry(); return gc.hasS2Region(); } else if (exprtype == MatchExpression::GEO_NEAR) { GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node); // Make sure the near query is compatible with 2dsphere. if (gnme->getData().centroid.crs == SPHERE || gnme->getData().isNearSphere) { return true; } } return false; } else if (IndexNames::GEO_2D == indexedFieldType) { if (exprtype == MatchExpression::GEO_NEAR) { GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node); return gnme->getData().centroid.crs == FLAT; } else if (exprtype == MatchExpression::GEO) { // 2d only supports within. GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node); const GeoQuery& gq = gme->getGeoQuery(); if (GeoQuery::WITHIN != gq.getPred()) { return false; } const GeometryContainer& gc = gq.getGeometry(); // 2d indices answer flat queries. if (gc.hasFlatRegion()) { return true; } // 2d indices can answer centerSphere queries. if (NULL == gc._cap.get()) { return false; } verify(SPHERE == gc._cap->crs); const Circle& circle = gc._cap->circle; // No wrapping around the edge of the world is allowed in 2d centerSphere. return twoDWontWrap(circle, index); } return false; } else if (IndexNames::TEXT == indexedFieldType) { return (exprtype == MatchExpression::TEXT); } else if (IndexNames::GEO_HAYSTACK == indexedFieldType) { return false; } else { warning() << "Unknown indexing for node " << node->toString() << " and field " << elt.toString() << endl; verify(0); } }
// static void QueryPlanner::plan(const CanonicalQuery& query, const QueryPlannerParams& params, vector<QuerySolution*>* out) { QLOG() << "=============================\n" << "Beginning planning, options = " << optionString(params.options) << endl << "Canonical query:\n" << query.toString() << endl << "=============================" << endl; // The shortcut formerly known as IDHACK. See if it's a simple _id query. If so we might // just make an ixscan over the _id index and bypass the rest of planning entirely. if (!query.getParsed().isExplain() && !query.getParsed().showDiskLoc() && isSimpleIdQuery(query.getParsed().getFilter()) && !query.getParsed().hasOption(QueryOption_CursorTailable)) { // See if we can find an _id index. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { const IndexEntry& index = params.indices[i]; QLOG() << "IDHACK using index " << index.toString() << endl; // If so, we make a simple scan to find the doc. IndexScanNode* isn = new IndexScanNode(); isn->indexKeyPattern = index.keyPattern; isn->indexIsMultiKey = index.multikey; isn->direction = 1; isn->bounds.isSimpleRange = true; BSONObj key = getKeyFromQuery(index.keyPattern, query.getParsed().getFilter()); isn->bounds.startKey = isn->bounds.endKey = key; isn->bounds.endKeyInclusive = true; isn->computeProperties(); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, isn); if (NULL != soln) { out->push_back(soln); QLOG() << "IDHACK solution is:\n" << (*out)[0]->toString() << endl; // And that's it. return; } } } } for (size_t i = 0; i < params.indices.size(); ++i) { QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl; } bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. TODO: This is a policy departure. Previously I think you // could ask for a tailable cursor and it just tried to give you one. Now, we fail if we // can't provide one. Is this what we want? if (query.getParsed().hasOption(QueryOption_CursorTailable)) { if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) { QuerySolution* soln = buildCollscanSoln(query, true, params); if (NULL != soln) { out->push_back(soln); } } return; } // The hint can be $natural: 1. If this happens, output a collscan. It's a weird way of // saying "table scan for two, please." if (!query.getParsed().getHint().isEmpty()) { BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural"); if (!natural.eoo()) { QLOG() << "forcing a table scan due to hinted $natural\n"; if (canTableScan) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return; } } // NOR and NOT we can't handle well with indices. If we see them here, they weren't // rewritten to remove the negation. Just output a collscan for those. if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOT) || QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOR)) { // If there's a near predicate, we can't handle this. // TODO: Should canonicalized query detect this? if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)) { warning() << "Can't handle NOT/NOR with GEO_NEAR"; return; } QLOG() << "NOT/NOR in plan, just outtping a collscan\n"; if (canTableScan) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return; } // Figure out what fields we care about. unordered_set<string> fields; QueryPlannerIXSelect::getFields(query.root(), "", &fields); for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { QLOG() << "predicate over field " << *it << endl; } // Filter our indices so we only look at indices that are over our predicates. vector<IndexEntry> relevantIndices; // Hints require us to only consider the hinted index. BSONObj hintIndex = query.getParsed().getHint(); // Snapshot is a form of a hint. If snapshot is set, try to use _id index to make a real // plan. If that fails, just scan the _id index. if (query.getParsed().isSnapshot()) { // Find the ID index in indexKeyPatterns. It's our hint. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { hintIndex = params.indices[i].keyPattern; break; } } } size_t hintIndexNumber = numeric_limits<size_t>::max(); if (!hintIndex.isEmpty()) { // Sigh. If the hint is specified it might be using the index name. BSONElement firstHintElt = hintIndex.firstElement(); if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) { string hintName = firstHintElt.String(); for (size_t i = 0; i < params.indices.size(); ++i) { if (params.indices[i].name == hintName) { QLOG() << "hint by name specified, restricting indices to " << params.indices[i].keyPattern.toString() << endl; relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); hintIndexNumber = i; hintIndex = params.indices[i].keyPattern; break; } } } else { for (size_t i = 0; i < params.indices.size(); ++i) { if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) { relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); QLOG() << "hint specified, restricting indices to " << hintIndex.toString() << endl; hintIndexNumber = i; break; } } } if (hintIndexNumber == numeric_limits<size_t>::max()) { // This is supposed to be an error. warning() << "Can't find hint for " << hintIndex.toString(); return; } } else { QLOG() << "Finding relevant indices\n"; QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices); } for (size_t i = 0; i < relevantIndices.size(); ++i) { QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl; } // Figure out how useful each index is to each predicate. // query.root() is now annotated with RelevantTag(s). QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices); QLOG() << "rated tree" << endl; QLOG() << query.root()->toString() << endl; // If there is a GEO_NEAR it must have an index it can use directly. // XXX: move into data access? MatchExpression* gnNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) { // No index for GEO_NEAR? No query. RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode); vector<size_t> newFirst; // 2d + GEO_NEAR is annoying. Because 2d's GEO_NEAR isn't streaming we have to embed // the full query tree inside it as a matcher. for (size_t i = 0; i < tag->first.size(); ++i) { // GEO_NEAR has a non-2d index it can use. We can deal w/that in normal planning. if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) { newFirst.push_back(i); continue; } // If we're here, GEO_NEAR has a 2d index. We create a 2dgeonear plan with the // entire tree as a filter, if possible. GeoNear2DNode* solnRoot = new GeoNear2DNode(); solnRoot->nq = gnme->getData(); if (MatchExpression::GEO_NEAR != query.root()->matchType()) { // root is an AND, clone and delete the GEO_NEAR child. MatchExpression* filterTree = query.root()->shallowClone(); verify(MatchExpression::AND == filterTree->matchType()); bool foundChild = false; for (size_t i = 0; i < filterTree->numChildren(); ++i) { if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) { foundChild = true; filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i); break; } } verify(foundChild); solnRoot->filter.reset(filterTree); } solnRoot->numWanted = query.getParsed().getNumToReturn(); if (0 == solnRoot->numWanted) { solnRoot->numWanted = 100; } solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern; // Remove the 2d index. 2d can only be the first field, and we know there is // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll // only be first for gnNode. tag->first.erase(tag->first.begin() + i); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } } // Continue planning w/non-2d indices tagged for this pred. tag->first.swap(newFirst); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } } // Likewise, if there is a TEXT it must have an index it can use directly. MatchExpression* textNode; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) { RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } } // If we have any relevant indices, we try to create indexed plans. if (0 < relevantIndices.size()) { // The enumerator spits out trees tagged with IndexTag(s). PlanEnumerator isp(query.root(), &relevantIndices); isp.init(); MatchExpression* rawTree; while (isp.getNext(&rawTree)) { QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString() << endl; // This can fail if enumeration makes a mistake. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices); if (NULL == solnRoot) { continue; } QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { QLOG() << "Planner: adding solution:\n" << soln->toString() << endl; out->push_back(soln); } } } QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n"; // An index was hinted. If there are any solutions, they use the hinted index. If not, we // scan the entire index to provide results and output that as our plan. This is the // desired behavior when an index is hinted that is not relevant to the query. if (!hintIndex.isEmpty() && (0 == out->size())) { QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params); if (NULL != soln) { QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl; out->push_back(soln); } return; } // If a sort order is requested, there may be an index that provides it, even if that // index is not over any predicates in the query. // // XXX XXX: Can we do this even if the index is sparse? Might we miss things? if (!query.getParsed().getSort().isEmpty() && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { // See if we have a sort provided from an index already. bool usingIndexToSort = false; for (size_t i = 0; i < out->size(); ++i) { QuerySolution* soln = (*out)[i]; if (!soln->hasSortStage) { usingIndexToSort = true; break; } } if (!usingIndexToSort) { for (size_t i = 0; i < params.indices.size(); ++i) { const BSONObj& kp = params.indices[i].keyPattern; if (providesSort(query, kp)) { QLOG() << "Planner: outputting soln that uses index to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params); if (NULL != soln) { out->push_back(soln); break; } } if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { QLOG() << "Planner: outputting soln that uses (reverse) index " << "to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1); if (NULL != soln) { out->push_back(soln); break; } } } } } // TODO: Do we always want to offer a collscan solution? // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs. if ( !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan))) { QuerySolution* collscan = buildCollscanSoln(query, false, params); if (NULL != collscan) { out->push_back(collscan); QLOG() << "Planner: outputting a collscan:\n"; QLOG() << collscan->toString() << endl; } } }
// static bool QueryPlannerIXSelect::compatible(const BSONElement& elt, const IndexEntry& index, MatchExpression* node) { // XXX: CatalogHack::getAccessMethodName: do we have to worry about this? when? string ixtype; if (String != elt.type()) { ixtype = ""; } else { ixtype = elt.String(); } // We know elt.fieldname() == node->path(). MatchExpression::MatchType exprtype = node->matchType(); // TODO: use indexnames if ("" == ixtype) { if (index.sparse && exprtype == MatchExpression::EQ) { // Can't check for null w/a sparse index. const EqualityMatchExpression* expr = static_cast<const EqualityMatchExpression*>(node); return !expr->getData().isNull(); } return exprtype != MatchExpression::GEO && exprtype != MatchExpression::GEO_NEAR; } else if ("hashed" == ixtype) { return exprtype == MatchExpression::MATCH_IN || exprtype == MatchExpression::EQ; } else if ("2dsphere" == ixtype) { if (exprtype == MatchExpression::GEO) { // within or intersect. GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node); const GeoQuery& gq = gme->getGeoQuery(); const GeometryContainer& gc = gq.getGeometry(); return gc.hasS2Region(); } else if (exprtype == MatchExpression::GEO_NEAR) { GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node); // Make sure the near query is compatible with 2dsphere. if (gnme->getData().centroid.crs == SPHERE || gnme->getData().isNearSphere) { return true; } } return false; } else if ("2d" == ixtype) { if (exprtype == MatchExpression::GEO_NEAR) { GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(node); return gnme->getData().centroid.crs == FLAT; } else if (exprtype == MatchExpression::GEO) { // 2d only supports within. GeoMatchExpression* gme = static_cast<GeoMatchExpression*>(node); const GeoQuery& gq = gme->getGeoQuery(); if (GeoQuery::WITHIN != gq.getPred()) { return false; } const GeometryContainer& gc = gq.getGeometry(); // 2d indices answer flat queries. if (gc.hasFlatRegion()) { return true; } // 2d indices can answer centerSphere queries. if (NULL == gc._cap.get()) { return false; } verify(SPHERE == gc._cap->crs); const Circle& circle = gc._cap->circle; // No wrapping around the edge of the world is allowed in 2d centerSphere. return twoDWontWrap(circle, index); } return false; } else if ("text" == ixtype) { return (exprtype == MatchExpression::TEXT); } else if ("geoHaystack" == ixtype) { return false; } else { warning() << "Unknown indexing for node " << node->toString() << " and field " << elt.toString() << endl; verify(0); } }
// static Status QueryPlanner::plan(const CanonicalQuery& query, const QueryPlannerParams& params, std::vector<QuerySolution*>* out) { QLOG() << "=============================\n" << "Beginning planning, options = " << optionString(params.options) << endl << "Canonical query:\n" << query.toString() << endl << "=============================" << endl; for (size_t i = 0; i < params.indices.size(); ++i) { QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl; } bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. TODO: This is a policy departure. Previously I think you // could ask for a tailable cursor and it just tried to give you one. Now, we fail if we // can't provide one. Is this what we want? if (query.getParsed().hasOption(QueryOption_CursorTailable)) { if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) { QuerySolution* soln = buildCollscanSoln(query, true, params); if (NULL != soln) { out->push_back(soln); } } return Status::OK(); } // The hint can be $natural: 1. If this happens, output a collscan. It's a weird way of // saying "table scan for two, please." if (!query.getParsed().getHint().isEmpty()) { BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural"); if (!natural.eoo()) { QLOG() << "forcing a table scan due to hinted $natural\n"; // min/max are incompatible with $natural. if (canTableScan && query.getParsed().getMin().isEmpty() && query.getParsed().getMax().isEmpty()) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return Status::OK(); } } // Figure out what fields we care about. unordered_set<string> fields; QueryPlannerIXSelect::getFields(query.root(), "", &fields); for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { QLOG() << "predicate over field " << *it << endl; } // Filter our indices so we only look at indices that are over our predicates. vector<IndexEntry> relevantIndices; // Hints require us to only consider the hinted index. BSONObj hintIndex = query.getParsed().getHint(); // Snapshot is a form of a hint. If snapshot is set, try to use _id index to make a real // plan. If that fails, just scan the _id index. if (query.getParsed().isSnapshot()) { // Find the ID index in indexKeyPatterns. It's our hint. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { hintIndex = params.indices[i].keyPattern; break; } } } size_t hintIndexNumber = numeric_limits<size_t>::max(); if (hintIndex.isEmpty()) { QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices); } else { // Sigh. If the hint is specified it might be using the index name. BSONElement firstHintElt = hintIndex.firstElement(); if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) { string hintName = firstHintElt.String(); for (size_t i = 0; i < params.indices.size(); ++i) { if (params.indices[i].name == hintName) { QLOG() << "hint by name specified, restricting indices to " << params.indices[i].keyPattern.toString() << endl; relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); hintIndexNumber = i; hintIndex = params.indices[i].keyPattern; break; } } } else { for (size_t i = 0; i < params.indices.size(); ++i) { if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) { relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); QLOG() << "hint specified, restricting indices to " << hintIndex.toString() << endl; hintIndexNumber = i; break; } } } if (hintIndexNumber == numeric_limits<size_t>::max()) { return Status(ErrorCodes::BadValue, "bad hint"); } } // Deal with the .min() and .max() query options. If either exist we can only use an index // that matches the object inside. if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) { BSONObj minObj = query.getParsed().getMin(); BSONObj maxObj = query.getParsed().getMax(); // This is the index into params.indices[...] that we use. size_t idxNo = numeric_limits<size_t>::max(); // If there's an index hinted we need to be able to use it. if (!hintIndex.isEmpty()) { if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) { QLOG() << "minobj doesnt work w hint"; return Status(ErrorCodes::BadValue, "hint provided does not work with min query"); } if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) { QLOG() << "maxobj doesnt work w hint"; return Status(ErrorCodes::BadValue, "hint provided does not work with max query"); } idxNo = hintIndexNumber; } else { // No hinted index, look for one that is compatible (has same field names and // ordering thereof). for (size_t i = 0; i < params.indices.size(); ++i) { const BSONObj& kp = params.indices[i].keyPattern; BSONObj toUse = minObj.isEmpty() ? maxObj : minObj; if (indexCompatibleMaxMin(toUse, kp)) { idxNo = i; break; } } } if (idxNo == numeric_limits<size_t>::max()) { QLOG() << "Can't find relevant index to use for max/min query"; // Can't find an index to use, bail out. return Status(ErrorCodes::BadValue, "unable to find relevant index for max/min query"); } // maxObj can be empty; the index scan just goes until the end. minObj can't be empty // though, so if it is, we make a minKey object. if (minObj.isEmpty()) { BSONObjBuilder bob; bob.appendMinKey(""); minObj = bob.obj(); } else { // Must strip off the field names to make an index key. minObj = stripFieldNames(minObj); } if (!maxObj.isEmpty()) { // Must strip off the field names to make an index key. maxObj = stripFieldNames(maxObj); } QLOG() << "max/min query using index " << params.indices[idxNo].toString() << endl; // Make our scan and output. QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan(params.indices[idxNo], query, params, minObj, maxObj); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } return Status::OK(); } for (size_t i = 0; i < relevantIndices.size(); ++i) { QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl; } // Figure out how useful each index is to each predicate. // query.root() is now annotated with RelevantTag(s). QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices); QLOG() << "rated tree" << endl; QLOG() << query.root()->toString() << endl; // If there is a GEO_NEAR it must have an index it can use directly. // XXX: move into data access? MatchExpression* gnNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) { // No index for GEO_NEAR? No query. RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { QLOG() << "unable to find index for $geoNear query" << endl; return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query"); } GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode); vector<size_t> newFirst; // 2d + GEO_NEAR is annoying. Because 2d's GEO_NEAR isn't streaming we have to embed // the full query tree inside it as a matcher. for (size_t i = 0; i < tag->first.size(); ++i) { // GEO_NEAR has a non-2d index it can use. We can deal w/that in normal planning. if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) { newFirst.push_back(i); continue; } // If we're here, GEO_NEAR has a 2d index. We create a 2dgeonear plan with the // entire tree as a filter, if possible. GeoNear2DNode* solnRoot = new GeoNear2DNode(); solnRoot->nq = gnme->getData(); if (NULL != query.getProj()) { solnRoot->addPointMeta = query.getProj()->wantGeoNearPoint(); solnRoot->addDistMeta = query.getProj()->wantGeoNearDistance(); } if (MatchExpression::GEO_NEAR != query.root()->matchType()) { // root is an AND, clone and delete the GEO_NEAR child. MatchExpression* filterTree = query.root()->shallowClone(); verify(MatchExpression::AND == filterTree->matchType()); bool foundChild = false; for (size_t i = 0; i < filterTree->numChildren(); ++i) { if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) { foundChild = true; filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i); break; } } verify(foundChild); solnRoot->filter.reset(filterTree); } solnRoot->numWanted = query.getParsed().getNumToReturn(); if (0 == solnRoot->numWanted) { solnRoot->numWanted = 100; } solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern; // Remove the 2d index. 2d can only be the first field, and we know there is // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll // only be first for gnNode. tag->first.erase(tag->first.begin() + i); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } } // Continue planning w/non-2d indices tagged for this pred. tag->first.swap(newFirst); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return Status::OK(); } } // Likewise, if there is a TEXT it must have an index it can use directly. MatchExpression* textNode; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) { RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return Status::OK(); } } // If we have any relevant indices, we try to create indexed plans. if (0 < relevantIndices.size()) { // The enumerator spits out trees tagged with IndexTag(s). PlanEnumeratorParams enumParams; enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION; enumParams.root = query.root(); enumParams.indices = &relevantIndices; PlanEnumerator isp(enumParams); isp.init(); MatchExpression* rawTree; // XXX: have limit on # of indexed solns we'll consider. We could have a perverse // query and index that could make n^2 very unpleasant. while (isp.getNext(&rawTree)) { QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString() << endl; // This can fail if enumeration makes a mistake. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices); if (NULL == solnRoot) { continue; } QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { QLOG() << "Planner: adding solution:\n" << soln->toString() << endl; out->push_back(soln); } } } QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n"; // An index was hinted. If there are any solutions, they use the hinted index. If not, we // scan the entire index to provide results and output that as our plan. This is the // desired behavior when an index is hinted that is not relevant to the query. if (!hintIndex.isEmpty()) { if (0 == out->size()) { QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params); verify(NULL != soln); QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl; out->push_back(soln); } return Status::OK(); } // If a sort order is requested, there may be an index that provides it, even if that // index is not over any predicates in the query. // if (!query.getParsed().getSort().isEmpty() && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { // See if we have a sort provided from an index already. bool usingIndexToSort = false; for (size_t i = 0; i < out->size(); ++i) { QuerySolution* soln = (*out)[i]; if (!soln->hasSortStage) { usingIndexToSort = true; break; } } if (!usingIndexToSort) { for (size_t i = 0; i < params.indices.size(); ++i) { const IndexEntry& index = params.indices[i]; if (index.sparse) { continue; } const BSONObj kp = LiteParsedQuery::normalizeSortOrder(index.keyPattern); if (providesSort(query, kp)) { QLOG() << "Planner: outputting soln that uses index to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params); if (NULL != soln) { out->push_back(soln); break; } } if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { QLOG() << "Planner: outputting soln that uses (reverse) index " << "to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1); if (NULL != soln) { out->push_back(soln); break; } } } } } // TODO: Do we always want to offer a collscan solution? // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs. if ( !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && hintIndex.isEmpty() && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan))) { QuerySolution* collscan = buildCollscanSoln(query, false, params); if (NULL != collscan) { out->push_back(collscan); QLOG() << "Planner: outputting a collscan:\n"; QLOG() << collscan->toString() << endl; } } return Status::OK(); }