Status AuthzManagerExternalStateMock::_queryVector( const NamespaceString& collectionName, const BSONObj& query, std::vector<BSONObjCollection::iterator>* result) { StatusWithMatchExpression parseResult = MatchExpressionParser::parse(query); if (!parseResult.isOK()) { return parseResult.getStatus(); } MatchExpression* matcher = parseResult.getValue(); NamespaceDocumentMap::iterator mapIt = _documents.find(collectionName); if (mapIt == _documents.end()) return Status(ErrorCodes::NoMatchingDocument, "No collection named " + collectionName.ns()); for (BSONObjCollection::iterator vecIt = mapIt->second.begin(); vecIt != mapIt->second.end(); ++vecIt) { if (matcher->matchesBSON(*vecIt)) { result->push_back(vecIt); } } return Status::OK(); }
// static MatchExpression* CanonicalQuery::normalizeTree(MatchExpression* root) { // root->isLogical() is true now. We care about AND, OR, and NOT. NOR currently scares us. if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) { // We could have AND of AND of AND. Make sure we clean up our children before merging // them. // UNITTEST 11738048 for (size_t i = 0; i < root->getChildVector()->size(); ++i) { (*root->getChildVector())[i] = normalizeTree(root->getChild(i)); } // If any of our children are of the same logical operator that we are, we remove the // child's children and append them to ourselves after we examine all children. std::vector<MatchExpression*> absorbedChildren; for (size_t i = 0; i < root->numChildren();) { MatchExpression* child = root->getChild(i); if (child->matchType() == root->matchType()) { // AND of an AND or OR of an OR. Absorb child's children into ourself. for (size_t j = 0; j < child->numChildren(); ++j) { absorbedChildren.push_back(child->getChild(j)); } // TODO(opt): this is possibly n^2-ish root->getChildVector()->erase(root->getChildVector()->begin() + i); child->getChildVector()->clear(); // Note that this only works because we cleared the child's children delete child; // Don't increment 'i' as the current child 'i' used to be child 'i+1' } else { ++i; } } root->getChildVector()->insert( root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end()); // AND of 1 thing is the thing, OR of 1 thing is the thing. if (1 == root->numChildren()) { MatchExpression* ret = root->getChild(0); root->getChildVector()->clear(); delete root; return ret; } } else if (MatchExpression::NOT == root->matchType()) { // Normalize the rest of the tree hanging off this NOT node. NotMatchExpression* nme = static_cast<NotMatchExpression*>(root); MatchExpression* child = nme->releaseChild(); // normalizeTree(...) takes ownership of 'child', and then // transfers ownership of its return value to 'nme'. nme->resetChild(normalizeTree(child)); } else if (MatchExpression::ELEM_MATCH_VALUE == root->matchType()) { // Just normalize our children. for (size_t i = 0; i < root->getChildVector()->size(); ++i) { (*root->getChildVector())[i] = normalizeTree(root->getChild(i)); } } return root; }
TEST( MatchExpressionParserText, Parse1 ) { BSONObj query = fromjson( "{$text:{$search:\"awesome\", $language:\"english\"}}" ); StatusWithMatchExpression result = MatchExpressionParser::parse( query ); ASSERT_TRUE( result.isOK() ); MatchExpression* exp = result.getValue(); ASSERT_EQUALS( MatchExpression::TEXT, exp->matchType() ); TextMatchExpression* textExp = static_cast<TextMatchExpression*>( exp ); ASSERT_EQUALS( textExp->getQuery(), "awesome" ); ASSERT_EQUALS( textExp->getLanguage(), "english" ); }
TEST( MatchExpressionParserGeoNear, ParseNear ) { BSONObj query = fromjson("{loc:{$near:{$maxDistance:100, " "$geometry:{type:\"Point\", coordinates:[0,0]}}}}"); StatusWithMatchExpression result = MatchExpressionParser::parse( query ); ASSERT_TRUE( result.isOK() ); MatchExpression* exp = result.getValue(); ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQUALS(gnexp->getData().maxDistance, 100); }
// For $near, $nearSphere, and $geoNear syntax of: // { // $near/$nearSphere/$geoNear: [ <x>, <y> ], // $minDistance: <distance in radians>, // $maxDistance: <distance in radians> // } TEST(MatchExpressionParserGeoNear, ParseValidNear) { BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}"); StatusWithMatchExpression result = MatchExpressionParser::parse(query); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQ(gnexp->getData().maxDistance, 100); ASSERT_EQ(gnexp->getData().minDistance, 50); }
// For $near, $nearSphere, and $geoNear syntax of: // { // $near/$nearSphere/$geoNear: [ <x>, <y> ], // $minDistance: <distance in radians>, // $maxDistance: <distance in radians> // } TEST(MatchExpressionParserGeoNear, ParseValidNear) { BSONObj query = fromjson("{loc: {$near: [0,0], $maxDistance: 100, $minDistance: 50}}"); const CollatorInterface* collator = nullptr; StatusWithMatchExpression result = MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQ(gnexp->getData().maxDistance, 100); ASSERT_EQ(gnexp->getData().minDistance, 50); }
// static MatchExpression* CanonicalQuery::normalizeTree(MatchExpression* root) { // root->isLogical() is true now. We care about AND and OR. Negations currently scare us. if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) { // We could have AND of AND of AND. Make sure we clean up our children before merging // them. // UNITTEST 11738048 for (size_t i = 0; i < root->getChildVector()->size(); ++i) { (*root->getChildVector())[i] = normalizeTree(root->getChild(i)); } // If any of our children are of the same logical operator that we are, we remove the // child's children and append them to ourselves after we examine all children. vector<MatchExpression*> absorbedChildren; for (size_t i = 0; i < root->numChildren();) { MatchExpression* child = root->getChild(i); if (child->matchType() == root->matchType()) { // AND of an AND or OR of an OR. Absorb child's children into ourself. for (size_t j = 0; j < child->numChildren(); ++j) { absorbedChildren.push_back(child->getChild(j)); } // TODO(opt): this is possibly n^2-ish root->getChildVector()->erase(root->getChildVector()->begin() + i); child->getChildVector()->clear(); // Note that this only works because we cleared the child's children delete child; // Don't increment 'i' as the current child 'i' used to be child 'i+1' } else { ++i; } } root->getChildVector()->insert(root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end()); // AND of 1 thing is the thing, OR of 1 thing is the thing. if (1 == root->numChildren()) { MatchExpression* ret = root->getChild(0); root->getChildVector()->clear(); delete root; return ret; } } return root; }
bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) { if (expr.isLogical()) { // Any logical expression is independent of 'pathSet' if all its children are independent of // 'pathSet'. for (size_t i = 0; i < expr.numChildren(); i++) { if (!isIndependentOf(*expr.getChild(i), pathSet)) { return false; } } return true; } // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to // determine if it is independent or not, and instead just return false. return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet); }
void tagForSort(MatchExpression* tree) { if (!Indexability::nodeCanUseIndexOnOwnField(tree)) { size_t myTagValue = IndexTag::kNoIndex; for (size_t i = 0; i < tree->numChildren(); ++i) { MatchExpression* child = tree->getChild(i); tagForSort(child); IndexTag* childTag = static_cast<IndexTag*>(child->getTag()); if (NULL != childTag) { myTagValue = std::min(myTagValue, childTag->index); } } if (myTagValue != IndexTag::kNoIndex) { tree->setTag(new IndexTag(myTagValue)); } } }
TEST(MatchExpressionParserGeoNear, ParseNear) { BSONObj query = fromjson( "{loc:{$near:{$maxDistance:100, " "$geometry:{type:\"Point\", coordinates:[0,0]}}}}"); const CollatorInterface* collator = nullptr; StatusWithMatchExpression result = MatchExpressionParser::parse(query, ExtensionsCallbackDisallowExtensions(), collator); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQUALS(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQUALS(gnexp->getData().maxDistance, 100); }
// static // XXX TODO: This does not belong here at all. MatchExpression* CanonicalQuery::logicalRewrite(MatchExpression* tree) { // Only thing we do is pull an OR up at the root. if (MatchExpression::AND != tree->matchType()) { return tree; } // We want to bail out ASAP if we have nothing to do here. size_t numOrs = 0; for (size_t i = 0; i < tree->numChildren(); ++i) { if (MatchExpression::OR == tree->getChild(i)->matchType()) { ++numOrs; } } // Only do this for one OR right now. if (1 != numOrs) { return tree; } // Detach the OR from the root. invariant(NULL != tree->getChildVector()); std::vector<MatchExpression*>& rootChildren = *tree->getChildVector(); MatchExpression* orChild = NULL; for (size_t i = 0; i < rootChildren.size(); ++i) { if (MatchExpression::OR == rootChildren[i]->matchType()) { orChild = rootChildren[i]; rootChildren.erase(rootChildren.begin() + i); break; } } // AND the existing root with each or child. invariant(NULL != orChild); invariant(NULL != orChild->getChildVector()); std::vector<MatchExpression*>& orChildren = *orChild->getChildVector(); for (size_t i = 0; i < orChildren.size(); ++i) { AndMatchExpression* ama = new AndMatchExpression(); ama->add(orChildren[i]); ama->add(tree->shallowClone()); orChildren[i] = ama; } delete tree; // Clean up any consequences from this tomfoolery. return normalizeTree(orChild); }
static Status _extractFullEqualityMatches(const MatchExpression& root, const FieldRefSet* fullPathsToExtract, EqualityMatches* equalities) { if (root.matchType() == MatchExpression::EQ) { // Extract equality matches const EqualityMatchExpression& eqChild = static_cast<const EqualityMatchExpression&>(root); FieldRef path(eqChild.path()); if (fullPathsToExtract) { FieldRefSet conflictPaths; fullPathsToExtract->findConflicts(&path, &conflictPaths); // Ignore if this path is unrelated to the full paths if (conflictPaths.empty()) return Status::OK(); // Make sure we're a prefix of all the conflict paths Status status = checkPathIsPrefixOf(path, conflictPaths); if (!status.isOK()) return status; } Status status = checkEqualityConflicts(*equalities, path); if (!status.isOK()) return status; equalities->insert(make_pair(eqChild.path(), &eqChild)); } else if (root.matchType() == MatchExpression::AND) { // Further explore $and matches for (size_t i = 0; i < root.numChildren(); ++i) { MatchExpression* child = root.getChild(i); Status status = _extractFullEqualityMatches(*child, fullPathsToExtract, equalities); if (!status.isOK()) return status; } } return Status::OK(); }
// static void QueryPlannerIXSelect::stripUnneededAssignments(MatchExpression* node, const std::vector<IndexEntry>& indices) { if (MatchExpression::AND == node->matchType()) { for (size_t i = 0; i < node->numChildren(); i++) { MatchExpression* child = node->getChild(i); if (MatchExpression::EQ != child->matchType()) { continue; } if (!child->getTag()) { continue; } // We found a EQ child of an AND which is tagged. RelevantTag* rt = static_cast<RelevantTag*>(child->getTag()); // Look through all of the indices for which this predicate can be answered with // the leading field of the index. for (std::vector<size_t>::const_iterator i = rt->first.begin(); i != rt->first.end(); ++i) { size_t index = *i; if (indices[index].unique && 1 == indices[index].keyPattern.nFields()) { // Found an EQ predicate which can use a single-field unique index. // Clear assignments from the entire tree, and add back a single assignment // for 'child' to the unique index. clearAssignments(node); RelevantTag* newRt = static_cast<RelevantTag*>(child->getTag()); newRt->first.push_back(index); // Tag state has been reset in the entire subtree at 'root'; nothing // else for us to do. return; } } } } for (size_t i = 0; i < node->numChildren(); i++) { stripUnneededAssignments(node->getChild(i), indices); } }
TEST(MatchExpressionParserGeoNear, ParseValidNearSphere) { BSONObj query = fromjson("{loc: {$nearSphere: [0,0], $maxDistance: 100, $minDistance: 50}}"); const CollatorInterface* collator = nullptr; const boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); StatusWithMatchExpression result = MatchExpressionParser::parse(query, collator, expCtx, ExtensionsCallbackNoop(), MatchExpressionParser::kAllowAllSpecialFeatures); ASSERT_TRUE(result.isOK()); MatchExpression* exp = result.getValue().get(); ASSERT_EQ(MatchExpression::GEO_NEAR, exp->matchType()); GeoNearMatchExpression* gnexp = static_cast<GeoNearMatchExpression*>(exp); ASSERT_EQ(gnexp->getData().maxDistance, 100); ASSERT_EQ(gnexp->getData().minDistance, 50); }
Status SubplanRunner::planSubqueries() { MatchExpression* theOr = _query->root(); for (size_t i = 0; i < _plannerParams.indices.size(); ++i) { const IndexEntry& ie = _plannerParams.indices[i]; _indexMap[ie.keyPattern] = i; QLOG() << "Subplanner: index " << i << " is " << ie.toString() << endl; } const WhereCallbackReal whereCallback(_collection->ns().db()); for (size_t i = 0; i < theOr->numChildren(); ++i) { // Turn the i-th child into its own query. MatchExpression* orChild = theOr->getChild(i); CanonicalQuery* orChildCQ; Status childCQStatus = CanonicalQuery::canonicalize(*_query, orChild, &orChildCQ, whereCallback); if (!childCQStatus.isOK()) { mongoutils::str::stream ss; ss << "Subplanner: Can't canonicalize subchild " << orChild->toString() << " " << childCQStatus.reason(); return Status(ErrorCodes::BadValue, ss); } // Make sure it gets cleaned up. auto_ptr<CanonicalQuery> safeOrChildCQ(orChildCQ); // Plan the i-th child. vector<QuerySolution*> solutions; // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from // considering any plan that's a collscan. QLOG() << "Subplanner: planning child " << i << " of " << theOr->numChildren(); Status status = QueryPlanner::plan(*safeOrChildCQ, _plannerParams, &solutions); if (!status.isOK()) { mongoutils::str::stream ss; ss << "Subplanner: Can't plan for subchild " << orChildCQ->toString() << " " << status.reason(); return Status(ErrorCodes::BadValue, ss); } QLOG() << "Subplanner: got " << solutions.size() << " solutions"; if (0 == solutions.size()) { // If one child doesn't have an indexed solution, bail out. mongoutils::str::stream ss; ss << "Subplanner: No solutions for subchild " << orChildCQ->toString(); return Status(ErrorCodes::BadValue, ss); } // Hang onto the canonicalized subqueries and the corresponding query solutions // so that they can be used in subplan running later on. _cqs.push(safeOrChildCQ.release()); _solutions.push(solutions); } return Status::OK(); }
Status AuthzManagerExternalStateMock::_findUser(const std::string& usersNamespace, const BSONObj& query, BSONObj* result) const { StatusWithMatchExpression parseResult = MatchExpressionParser::parse(query); if (!parseResult.isOK()) { return parseResult.getStatus(); } MatchExpression* matcher = parseResult.getValue(); unordered_map<std::string, std::vector<BSONObj> >::const_iterator mapIt; for (mapIt = _userDocuments.begin(); mapIt != _userDocuments.end(); ++mapIt) { for (std::vector<BSONObj>::const_iterator vecIt = mapIt->second.begin(); vecIt != mapIt->second.end(); ++vecIt) { if (nsToDatabase(usersNamespace) == mapIt->first && matcher->matchesBSON(*vecIt)) { *result = *vecIt; return Status::OK(); } } } return Status(ErrorCodes::UserNotFound, "User not found"); }
void PlanEnumerator::tagMemo(size_t id) { QLOG() << "Tagging memoID " << id << endl; NodeAssignment* assign = _memo[id]; verify(NULL != assign); if (NULL != assign->pred) { PredicateAssignment* pa = assign->pred.get(); verify(NULL == pa->expr->getTag()); verify(pa->indexToAssign < pa->first.size()); pa->expr->setTag(new IndexTag(pa->first[pa->indexToAssign])); } else if (NULL != assign->orAssignment) { OrAssignment* oa = assign->orAssignment.get(); for (size_t i = 0; i < oa->subnodes.size(); ++i) { tagMemo(oa->subnodes[i]); } } else if (NULL != assign->newAnd) { AndAssignment* aa = assign->newAnd.get(); if (AndAssignment::MANDATORY == aa->state) { verify(aa->counter < aa->mandatory.size()); const OneIndexAssignment& assign = aa->mandatory[aa->counter]; for (size_t i = 0; i < assign.preds.size(); ++i) { MatchExpression* pred = assign.preds[i]; verify(NULL == pred->getTag()); pred->setTag(new IndexTag(assign.index, assign.positions[i])); } } else if (AndAssignment::PRED_CHOICES == aa->state) { verify(aa->counter < aa->predChoices.size()); const OneIndexAssignment& assign = aa->predChoices[aa->counter]; for (size_t i = 0; i < assign.preds.size(); ++i) { MatchExpression* pred = assign.preds[i]; verify(NULL == pred->getTag()); pred->setTag(new IndexTag(assign.index, assign.positions[i])); } } else { verify(AndAssignment::SUBNODES == aa->state); verify(aa->counter < aa->subnodes.size()); tagMemo(aa->subnodes[aa->counter]); } } else { verify(0); } }
// static Status QueryPlanner::plan(const CanonicalQuery& query, const QueryPlannerParams& params, std::vector<QuerySolution*>* out) { QLOG() << "=============================\n" << "Beginning planning, options = " << optionString(params.options) << endl << "Canonical query:\n" << query.toString() << endl << "=============================" << endl; for (size_t i = 0; i < params.indices.size(); ++i) { QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl; } bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. TODO: This is a policy departure. Previously I think you // could ask for a tailable cursor and it just tried to give you one. Now, we fail if we // can't provide one. Is this what we want? if (query.getParsed().hasOption(QueryOption_CursorTailable)) { if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) { QuerySolution* soln = buildCollscanSoln(query, true, params); if (NULL != soln) { out->push_back(soln); } } return Status::OK(); } // The hint can be $natural: 1. If this happens, output a collscan. It's a weird way of // saying "table scan for two, please." if (!query.getParsed().getHint().isEmpty()) { BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural"); if (!natural.eoo()) { QLOG() << "forcing a table scan due to hinted $natural\n"; // min/max are incompatible with $natural. if (canTableScan && query.getParsed().getMin().isEmpty() && query.getParsed().getMax().isEmpty()) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return Status::OK(); } } // Figure out what fields we care about. unordered_set<string> fields; QueryPlannerIXSelect::getFields(query.root(), "", &fields); for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { QLOG() << "predicate over field " << *it << endl; } // Filter our indices so we only look at indices that are over our predicates. vector<IndexEntry> relevantIndices; // Hints require us to only consider the hinted index. BSONObj hintIndex = query.getParsed().getHint(); // Snapshot is a form of a hint. If snapshot is set, try to use _id index to make a real // plan. If that fails, just scan the _id index. if (query.getParsed().isSnapshot()) { // Find the ID index in indexKeyPatterns. It's our hint. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { hintIndex = params.indices[i].keyPattern; break; } } } size_t hintIndexNumber = numeric_limits<size_t>::max(); if (hintIndex.isEmpty()) { QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices); } else { // Sigh. If the hint is specified it might be using the index name. BSONElement firstHintElt = hintIndex.firstElement(); if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) { string hintName = firstHintElt.String(); for (size_t i = 0; i < params.indices.size(); ++i) { if (params.indices[i].name == hintName) { QLOG() << "hint by name specified, restricting indices to " << params.indices[i].keyPattern.toString() << endl; relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); hintIndexNumber = i; hintIndex = params.indices[i].keyPattern; break; } } } else { for (size_t i = 0; i < params.indices.size(); ++i) { if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) { relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); QLOG() << "hint specified, restricting indices to " << hintIndex.toString() << endl; hintIndexNumber = i; break; } } } if (hintIndexNumber == numeric_limits<size_t>::max()) { return Status(ErrorCodes::BadValue, "bad hint"); } } // Deal with the .min() and .max() query options. If either exist we can only use an index // that matches the object inside. if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) { BSONObj minObj = query.getParsed().getMin(); BSONObj maxObj = query.getParsed().getMax(); // This is the index into params.indices[...] that we use. size_t idxNo = numeric_limits<size_t>::max(); // If there's an index hinted we need to be able to use it. if (!hintIndex.isEmpty()) { if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) { QLOG() << "minobj doesnt work w hint"; return Status(ErrorCodes::BadValue, "hint provided does not work with min query"); } if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) { QLOG() << "maxobj doesnt work w hint"; return Status(ErrorCodes::BadValue, "hint provided does not work with max query"); } idxNo = hintIndexNumber; } else { // No hinted index, look for one that is compatible (has same field names and // ordering thereof). for (size_t i = 0; i < params.indices.size(); ++i) { const BSONObj& kp = params.indices[i].keyPattern; BSONObj toUse = minObj.isEmpty() ? maxObj : minObj; if (indexCompatibleMaxMin(toUse, kp)) { idxNo = i; break; } } } if (idxNo == numeric_limits<size_t>::max()) { QLOG() << "Can't find relevant index to use for max/min query"; // Can't find an index to use, bail out. return Status(ErrorCodes::BadValue, "unable to find relevant index for max/min query"); } // maxObj can be empty; the index scan just goes until the end. minObj can't be empty // though, so if it is, we make a minKey object. if (minObj.isEmpty()) { BSONObjBuilder bob; bob.appendMinKey(""); minObj = bob.obj(); } else { // Must strip off the field names to make an index key. minObj = stripFieldNames(minObj); } if (!maxObj.isEmpty()) { // Must strip off the field names to make an index key. maxObj = stripFieldNames(maxObj); } QLOG() << "max/min query using index " << params.indices[idxNo].toString() << endl; // Make our scan and output. QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan(params.indices[idxNo], query, params, minObj, maxObj); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } return Status::OK(); } for (size_t i = 0; i < relevantIndices.size(); ++i) { QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl; } // Figure out how useful each index is to each predicate. // query.root() is now annotated with RelevantTag(s). QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices); QLOG() << "rated tree" << endl; QLOG() << query.root()->toString() << endl; // If there is a GEO_NEAR it must have an index it can use directly. // XXX: move into data access? MatchExpression* gnNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) { // No index for GEO_NEAR? No query. RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { QLOG() << "unable to find index for $geoNear query" << endl; return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query"); } GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode); vector<size_t> newFirst; // 2d + GEO_NEAR is annoying. Because 2d's GEO_NEAR isn't streaming we have to embed // the full query tree inside it as a matcher. for (size_t i = 0; i < tag->first.size(); ++i) { // GEO_NEAR has a non-2d index it can use. We can deal w/that in normal planning. if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) { newFirst.push_back(i); continue; } // If we're here, GEO_NEAR has a 2d index. We create a 2dgeonear plan with the // entire tree as a filter, if possible. GeoNear2DNode* solnRoot = new GeoNear2DNode(); solnRoot->nq = gnme->getData(); if (NULL != query.getProj()) { solnRoot->addPointMeta = query.getProj()->wantGeoNearPoint(); solnRoot->addDistMeta = query.getProj()->wantGeoNearDistance(); } if (MatchExpression::GEO_NEAR != query.root()->matchType()) { // root is an AND, clone and delete the GEO_NEAR child. MatchExpression* filterTree = query.root()->shallowClone(); verify(MatchExpression::AND == filterTree->matchType()); bool foundChild = false; for (size_t i = 0; i < filterTree->numChildren(); ++i) { if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) { foundChild = true; filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i); break; } } verify(foundChild); solnRoot->filter.reset(filterTree); } solnRoot->numWanted = query.getParsed().getNumToReturn(); if (0 == solnRoot->numWanted) { solnRoot->numWanted = 100; } solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern; // Remove the 2d index. 2d can only be the first field, and we know there is // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll // only be first for gnNode. tag->first.erase(tag->first.begin() + i); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } } // Continue planning w/non-2d indices tagged for this pred. tag->first.swap(newFirst); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return Status::OK(); } } // Likewise, if there is a TEXT it must have an index it can use directly. MatchExpression* textNode; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) { RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return Status::OK(); } } // If we have any relevant indices, we try to create indexed plans. if (0 < relevantIndices.size()) { // The enumerator spits out trees tagged with IndexTag(s). PlanEnumeratorParams enumParams; enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION; enumParams.root = query.root(); enumParams.indices = &relevantIndices; PlanEnumerator isp(enumParams); isp.init(); MatchExpression* rawTree; // XXX: have limit on # of indexed solns we'll consider. We could have a perverse // query and index that could make n^2 very unpleasant. while (isp.getNext(&rawTree)) { QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString() << endl; // This can fail if enumeration makes a mistake. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices); if (NULL == solnRoot) { continue; } QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { QLOG() << "Planner: adding solution:\n" << soln->toString() << endl; out->push_back(soln); } } } QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n"; // An index was hinted. If there are any solutions, they use the hinted index. If not, we // scan the entire index to provide results and output that as our plan. This is the // desired behavior when an index is hinted that is not relevant to the query. if (!hintIndex.isEmpty()) { if (0 == out->size()) { QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params); verify(NULL != soln); QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl; out->push_back(soln); } return Status::OK(); } // If a sort order is requested, there may be an index that provides it, even if that // index is not over any predicates in the query. // if (!query.getParsed().getSort().isEmpty() && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { // See if we have a sort provided from an index already. bool usingIndexToSort = false; for (size_t i = 0; i < out->size(); ++i) { QuerySolution* soln = (*out)[i]; if (!soln->hasSortStage) { usingIndexToSort = true; break; } } if (!usingIndexToSort) { for (size_t i = 0; i < params.indices.size(); ++i) { const IndexEntry& index = params.indices[i]; if (index.sparse) { continue; } const BSONObj kp = LiteParsedQuery::normalizeSortOrder(index.keyPattern); if (providesSort(query, kp)) { QLOG() << "Planner: outputting soln that uses index to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params); if (NULL != soln) { out->push_back(soln); break; } } if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { QLOG() << "Planner: outputting soln that uses (reverse) index " << "to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1); if (NULL != soln) { out->push_back(soln); break; } } } } } // TODO: Do we always want to offer a collscan solution? // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs. if ( !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && hintIndex.isEmpty() && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan))) { QuerySolution* collscan = buildCollscanSoln(query, false, params); if (NULL != collscan) { out->push_back(collscan); QLOG() << "Planner: outputting a collscan:\n"; QLOG() << collscan->toString() << endl; } } return Status::OK(); }
/** * Traverse the subtree rooted at 'node' to remove invalid RelevantTag assignments to text index * 'idx', which has prefix paths 'prefixPaths'. */ static void stripInvalidAssignmentsToTextIndex(MatchExpression* node, size_t idx, const unordered_set<StringData, StringData::Hasher>& prefixPaths) { // If we're here, there are prefixPaths and node is either: // 1. a text pred which we can't use as we have nothing over its prefix, or // 2. a non-text pred which we can't use as we don't have a text pred AND-related. if (Indexability::nodeCanUseIndexOnOwnField(node)) { removeIndexRelevantTag(node, idx); return; } // Do not traverse tree beyond negation node. if (node->matchType() == MatchExpression::NOT || node->matchType() == MatchExpression::NOR) { return; } // For anything to use a text index with prefixes, we require that: // 1. The text pred exists in an AND, // 2. The non-text preds that use the text index's prefixes are also in that AND. if (node->matchType() != MatchExpression::AND) { // It's an OR or some kind of array operator. for (size_t i = 0; i < node->numChildren(); ++i) { stripInvalidAssignmentsToTextIndex(node->getChild(i), idx, prefixPaths); } return; } // If we're here, we're an AND. Determine whether the children satisfy the index prefix for // the text index. invariant(node->matchType() == MatchExpression::AND); bool hasText = false; // The AND must have an EQ predicate for each prefix path. When we encounter a child with a // tag we remove it from childrenPrefixPaths. All children exist if this set is empty at // the end. unordered_set<StringData, StringData::Hasher> childrenPrefixPaths = prefixPaths; for (size_t i = 0; i < node->numChildren(); ++i) { MatchExpression* child = node->getChild(i); RelevantTag* tag = static_cast<RelevantTag*>(child->getTag()); if (NULL == tag) { // 'child' could be a logical operator. Maybe there are some assignments hiding // inside. stripInvalidAssignmentsToTextIndex(child, idx, prefixPaths); continue; } bool inFirst = tag->first.end() != std::find(tag->first.begin(), tag->first.end(), idx); bool inNotFirst = tag->notFirst.end() != std::find(tag->notFirst.begin(), tag->notFirst.end(), idx); if (inFirst || inNotFirst) { // Great! 'child' was assigned to our index. if (child->matchType() == MatchExpression::TEXT) { hasText = true; } else { childrenPrefixPaths.erase(child->path()); // One fewer prefix we're looking for, possibly. Note that we could have a // suffix assignment on the index and wind up here. In this case the erase // above won't do anything since a suffix isn't a prefix. } } else { // Recurse on the children to ensure that they're not hiding any assignments // to idx. stripInvalidAssignmentsToTextIndex(child, idx, prefixPaths); } } // Our prereqs for using the text index were not satisfied so we remove the assignments from // all children of the AND. if (!hasText || !childrenPrefixPaths.empty()) { for (size_t i = 0; i < node->numChildren(); ++i) { stripInvalidAssignmentsToTextIndex(node->getChild(i), idx, prefixPaths); } } }
static void stripInvalidAssignmentsTo2dsphereIndex(MatchExpression* node, size_t idx) { if (Indexability::nodeCanUseIndexOnOwnField(node)) { removeIndexRelevantTag(node, idx); return; } const MatchExpression::MatchType nodeType = node->matchType(); // Don't bother peeking inside of negations. if (MatchExpression::NOT == nodeType || MatchExpression::NOR == nodeType) { return; } if (MatchExpression::AND != nodeType) { // It's an OR or some kind of array operator. for (size_t i = 0; i < node->numChildren(); ++i) { stripInvalidAssignmentsTo2dsphereIndex(node->getChild(i), idx); } return; } bool hasGeoField = false; for (size_t i = 0; i < node->numChildren(); ++i) { MatchExpression* child = node->getChild(i); RelevantTag* tag = static_cast<RelevantTag*>(child->getTag()); if (NULL == tag) { // 'child' could be a logical operator. Maybe there are some assignments hiding // inside. stripInvalidAssignmentsTo2dsphereIndex(child, idx); continue; } bool inFirst = tag->first.end() != std::find(tag->first.begin(), tag->first.end(), idx); bool inNotFirst = tag->notFirst.end() != std::find(tag->notFirst.begin(), tag->notFirst.end(), idx); // If there is an index assignment... if (inFirst || inNotFirst) { // And it's a geo predicate... if (MatchExpression::GEO == child->matchType() || MatchExpression::GEO_NEAR == child->matchType()) { hasGeoField = true; } } else { // Recurse on the children to ensure that they're not hiding any assignments // to idx. stripInvalidAssignmentsTo2dsphereIndex(child, idx); } } // If there isn't a geo predicate our results aren't a subset of what's in the geo index, so // if we use the index we'll miss results. if (!hasGeoField) { for (size_t i = 0; i < node->numChildren(); ++i) { stripInvalidAssignmentsTo2dsphereIndex(node->getChild(i), idx); } } }
Status SubplanStage::planSubqueries() { _orExpression = _query->root()->shallowClone(); if (isContainedOr(_orExpression.get())) { _orExpression = rewriteToRootedOr(std::move(_orExpression)); invariant(CanonicalQuery::isValid(_orExpression.get(), _query->getParsed()).isOK()); } for (size_t i = 0; i < _plannerParams.indices.size(); ++i) { const IndexEntry& ie = _plannerParams.indices[i]; _indexMap[ie.keyPattern] = i; LOG(5) << "Subplanner: index " << i << " is " << ie.toString(); } const ExtensionsCallbackReal extensionsCallback(getOpCtx(), &_collection->ns()); for (size_t i = 0; i < _orExpression->numChildren(); ++i) { // We need a place to shove the results from planning this branch. _branchResults.push_back(new BranchPlanningResult()); BranchPlanningResult* branchResult = _branchResults.back(); MatchExpression* orChild = _orExpression->getChild(i); // Turn the i-th child into its own query. auto statusWithCQ = CanonicalQuery::canonicalize(*_query, orChild, extensionsCallback); if (!statusWithCQ.isOK()) { mongoutils::str::stream ss; ss << "Can't canonicalize subchild " << orChild->toString() << " " << statusWithCQ.getStatus().reason(); return Status(ErrorCodes::BadValue, ss); } branchResult->canonicalQuery = std::move(statusWithCQ.getValue()); // Plan the i-th child. We might be able to find a plan for the i-th child in the plan // cache. If there's no cached plan, then we generate and rank plans using the MPS. CachedSolution* rawCS; if (PlanCache::shouldCacheQuery(*branchResult->canonicalQuery) && _collection->infoCache() ->getPlanCache() ->get(*branchResult->canonicalQuery, &rawCS) .isOK()) { // We have a CachedSolution. Store it for later. LOG(5) << "Subplanner: cached plan found for child " << i << " of " << _orExpression->numChildren(); branchResult->cachedSolution.reset(rawCS); } else { // No CachedSolution found. We'll have to plan from scratch. LOG(5) << "Subplanner: planning child " << i << " of " << _orExpression->numChildren(); // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from // considering any plan that's a collscan. Status status = QueryPlanner::plan(*branchResult->canonicalQuery, _plannerParams, &branchResult->solutions.mutableVector()); if (!status.isOK()) { mongoutils::str::stream ss; ss << "Can't plan for subchild " << branchResult->canonicalQuery->toString() << " " << status.reason(); return Status(ErrorCodes::BadValue, ss); } LOG(5) << "Subplanner: got " << branchResult->solutions.size() << " solutions"; if (0 == branchResult->solutions.size()) { // If one child doesn't have an indexed solution, bail out. mongoutils::str::stream ss; ss << "No solutions for subchild " << branchResult->canonicalQuery->toString(); return Status(ErrorCodes::BadValue, ss); } } } return Status::OK(); }
// static void QueryPlanner::plan(const CanonicalQuery& query, const QueryPlannerParams& params, vector<QuerySolution*>* out) { QLOG() << "=============================\n" << "Beginning planning, options = " << optionString(params.options) << endl << "Canonical query:\n" << query.toString() << endl << "=============================" << endl; // The shortcut formerly known as IDHACK. See if it's a simple _id query. If so we might // just make an ixscan over the _id index and bypass the rest of planning entirely. if (!query.getParsed().isExplain() && !query.getParsed().showDiskLoc() && isSimpleIdQuery(query.getParsed().getFilter()) && !query.getParsed().hasOption(QueryOption_CursorTailable)) { // See if we can find an _id index. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { const IndexEntry& index = params.indices[i]; QLOG() << "IDHACK using index " << index.toString() << endl; // If so, we make a simple scan to find the doc. IndexScanNode* isn = new IndexScanNode(); isn->indexKeyPattern = index.keyPattern; isn->indexIsMultiKey = index.multikey; isn->direction = 1; isn->bounds.isSimpleRange = true; BSONObj key = getKeyFromQuery(index.keyPattern, query.getParsed().getFilter()); isn->bounds.startKey = isn->bounds.endKey = key; isn->bounds.endKeyInclusive = true; isn->computeProperties(); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, isn); if (NULL != soln) { out->push_back(soln); QLOG() << "IDHACK solution is:\n" << (*out)[0]->toString() << endl; // And that's it. return; } } } } for (size_t i = 0; i < params.indices.size(); ++i) { QLOG() << "idx " << i << " is " << params.indices[i].toString() << endl; } bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. TODO: This is a policy departure. Previously I think you // could ask for a tailable cursor and it just tried to give you one. Now, we fail if we // can't provide one. Is this what we want? if (query.getParsed().hasOption(QueryOption_CursorTailable)) { if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) { QuerySolution* soln = buildCollscanSoln(query, true, params); if (NULL != soln) { out->push_back(soln); } } return; } // The hint can be $natural: 1. If this happens, output a collscan. It's a weird way of // saying "table scan for two, please." if (!query.getParsed().getHint().isEmpty()) { BSONElement natural = query.getParsed().getHint().getFieldDotted("$natural"); if (!natural.eoo()) { QLOG() << "forcing a table scan due to hinted $natural\n"; if (canTableScan) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return; } } // NOR and NOT we can't handle well with indices. If we see them here, they weren't // rewritten to remove the negation. Just output a collscan for those. if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOT) || QueryPlannerCommon::hasNode(query.root(), MatchExpression::NOR)) { // If there's a near predicate, we can't handle this. // TODO: Should canonicalized query detect this? if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR)) { warning() << "Can't handle NOT/NOR with GEO_NEAR"; return; } QLOG() << "NOT/NOR in plan, just outtping a collscan\n"; if (canTableScan) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return; } // Figure out what fields we care about. unordered_set<string> fields; QueryPlannerIXSelect::getFields(query.root(), "", &fields); for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { QLOG() << "predicate over field " << *it << endl; } // Filter our indices so we only look at indices that are over our predicates. vector<IndexEntry> relevantIndices; // Hints require us to only consider the hinted index. BSONObj hintIndex = query.getParsed().getHint(); // Snapshot is a form of a hint. If snapshot is set, try to use _id index to make a real // plan. If that fails, just scan the _id index. if (query.getParsed().isSnapshot()) { // Find the ID index in indexKeyPatterns. It's our hint. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { hintIndex = params.indices[i].keyPattern; break; } } } size_t hintIndexNumber = numeric_limits<size_t>::max(); if (!hintIndex.isEmpty()) { // Sigh. If the hint is specified it might be using the index name. BSONElement firstHintElt = hintIndex.firstElement(); if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) { string hintName = firstHintElt.String(); for (size_t i = 0; i < params.indices.size(); ++i) { if (params.indices[i].name == hintName) { QLOG() << "hint by name specified, restricting indices to " << params.indices[i].keyPattern.toString() << endl; relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); hintIndexNumber = i; hintIndex = params.indices[i].keyPattern; break; } } } else { for (size_t i = 0; i < params.indices.size(); ++i) { if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) { relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); QLOG() << "hint specified, restricting indices to " << hintIndex.toString() << endl; hintIndexNumber = i; break; } } } if (hintIndexNumber == numeric_limits<size_t>::max()) { // This is supposed to be an error. warning() << "Can't find hint for " << hintIndex.toString(); return; } } else { QLOG() << "Finding relevant indices\n"; QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices); } for (size_t i = 0; i < relevantIndices.size(); ++i) { QLOG() << "relevant idx " << i << " is " << relevantIndices[i].toString() << endl; } // Figure out how useful each index is to each predicate. // query.root() is now annotated with RelevantTag(s). QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices); QLOG() << "rated tree" << endl; QLOG() << query.root()->toString() << endl; // If there is a GEO_NEAR it must have an index it can use directly. // XXX: move into data access? MatchExpression* gnNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) { // No index for GEO_NEAR? No query. RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode); vector<size_t> newFirst; // 2d + GEO_NEAR is annoying. Because 2d's GEO_NEAR isn't streaming we have to embed // the full query tree inside it as a matcher. for (size_t i = 0; i < tag->first.size(); ++i) { // GEO_NEAR has a non-2d index it can use. We can deal w/that in normal planning. if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) { newFirst.push_back(i); continue; } // If we're here, GEO_NEAR has a 2d index. We create a 2dgeonear plan with the // entire tree as a filter, if possible. GeoNear2DNode* solnRoot = new GeoNear2DNode(); solnRoot->nq = gnme->getData(); if (MatchExpression::GEO_NEAR != query.root()->matchType()) { // root is an AND, clone and delete the GEO_NEAR child. MatchExpression* filterTree = query.root()->shallowClone(); verify(MatchExpression::AND == filterTree->matchType()); bool foundChild = false; for (size_t i = 0; i < filterTree->numChildren(); ++i) { if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) { foundChild = true; filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i); break; } } verify(foundChild); solnRoot->filter.reset(filterTree); } solnRoot->numWanted = query.getParsed().getNumToReturn(); if (0 == solnRoot->numWanted) { solnRoot->numWanted = 100; } solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern; // Remove the 2d index. 2d can only be the first field, and we know there is // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll // only be first for gnNode. tag->first.erase(tag->first.begin() + i); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } } // Continue planning w/non-2d indices tagged for this pred. tag->first.swap(newFirst); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } } // Likewise, if there is a TEXT it must have an index it can use directly. MatchExpression* textNode; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) { RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { return; } } // If we have any relevant indices, we try to create indexed plans. if (0 < relevantIndices.size()) { // The enumerator spits out trees tagged with IndexTag(s). PlanEnumerator isp(query.root(), &relevantIndices); isp.init(); MatchExpression* rawTree; while (isp.getNext(&rawTree)) { QLOG() << "about to build solntree from tagged tree:\n" << rawTree->toString() << endl; // This can fail if enumeration makes a mistake. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess(query, rawTree, false, relevantIndices); if (NULL == solnRoot) { continue; } QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { QLOG() << "Planner: adding solution:\n" << soln->toString() << endl; out->push_back(soln); } } } QLOG() << "Planner: outputted " << out->size() << " indexed solutions.\n"; // An index was hinted. If there are any solutions, they use the hinted index. If not, we // scan the entire index to provide results and output that as our plan. This is the // desired behavior when an index is hinted that is not relevant to the query. if (!hintIndex.isEmpty() && (0 == out->size())) { QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params); if (NULL != soln) { QLOG() << "Planner: outputting soln that uses hinted index as scan." << endl; out->push_back(soln); } return; } // If a sort order is requested, there may be an index that provides it, even if that // index is not over any predicates in the query. // // XXX XXX: Can we do this even if the index is sparse? Might we miss things? if (!query.getParsed().getSort().isEmpty() && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { // See if we have a sort provided from an index already. bool usingIndexToSort = false; for (size_t i = 0; i < out->size(); ++i) { QuerySolution* soln = (*out)[i]; if (!soln->hasSortStage) { usingIndexToSort = true; break; } } if (!usingIndexToSort) { for (size_t i = 0; i < params.indices.size(); ++i) { const BSONObj& kp = params.indices[i].keyPattern; if (providesSort(query, kp)) { QLOG() << "Planner: outputting soln that uses index to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params); if (NULL != soln) { out->push_back(soln); break; } } if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { QLOG() << "Planner: outputting soln that uses (reverse) index " << "to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1); if (NULL != soln) { out->push_back(soln); break; } } } } } // TODO: Do we always want to offer a collscan solution? // XXX: currently disabling the always-use-a-collscan in order to find more planner bugs. if ( !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && ((params.options & QueryPlannerParams::INCLUDE_COLLSCAN) || (0 == out->size() && canTableScan))) { QuerySolution* collscan = buildCollscanSoln(query, false, params); if (NULL != collscan) { out->push_back(collscan); QLOG() << "Planner: outputting a collscan:\n"; QLOG() << collscan->toString() << endl; } } }
// static MatchExpression* CanonicalQuery::normalizeTree(MatchExpression* root) { if (MatchExpression::AND == root->matchType() || MatchExpression::OR == root->matchType()) { // We could have AND of AND of AND. Make sure we clean up our children before merging them. for (size_t i = 0; i < root->getChildVector()->size(); ++i) { (*root->getChildVector())[i] = normalizeTree(root->getChild(i)); } // If any of our children are of the same logical operator that we are, we remove the // child's children and append them to ourselves after we examine all children. std::vector<MatchExpression*> absorbedChildren; for (size_t i = 0; i < root->numChildren();) { MatchExpression* child = root->getChild(i); if (child->matchType() == root->matchType()) { // AND of an AND or OR of an OR. Absorb child's children into ourself. for (size_t j = 0; j < child->numChildren(); ++j) { absorbedChildren.push_back(child->getChild(j)); } // TODO(opt): this is possibly n^2-ish root->getChildVector()->erase(root->getChildVector()->begin() + i); child->getChildVector()->clear(); // Note that this only works because we cleared the child's children delete child; // Don't increment 'i' as the current child 'i' used to be child 'i+1' } else { ++i; } } root->getChildVector()->insert( root->getChildVector()->end(), absorbedChildren.begin(), absorbedChildren.end()); // AND of 1 thing is the thing, OR of 1 thing is the thing. if (1 == root->numChildren()) { MatchExpression* ret = root->getChild(0); root->getChildVector()->clear(); delete root; return ret; } } else if (MatchExpression::NOR == root->matchType()) { // First clean up children. for (size_t i = 0; i < root->getChildVector()->size(); ++i) { (*root->getChildVector())[i] = normalizeTree(root->getChild(i)); } // NOR of one thing is NOT of the thing. if (1 == root->numChildren()) { // Detach the child and assume ownership. std::unique_ptr<MatchExpression> child(root->getChild(0)); root->getChildVector()->clear(); // Delete the root when this goes out of scope. std::unique_ptr<NorMatchExpression> ownedRoot(static_cast<NorMatchExpression*>(root)); // Make a NOT to be the new root and transfer ownership of the child to it. auto newRoot = stdx::make_unique<NotMatchExpression>(); newRoot->init(child.release()).transitional_ignore(); return newRoot.release(); } } else if (MatchExpression::NOT == root->matchType()) { // Normalize the rest of the tree hanging off this NOT node. NotMatchExpression* nme = static_cast<NotMatchExpression*>(root); MatchExpression* child = nme->releaseChild(); // normalizeTree(...) takes ownership of 'child', and then // transfers ownership of its return value to 'nme'. nme->resetChild(normalizeTree(child)); } else if (MatchExpression::ELEM_MATCH_OBJECT == root->matchType()) { // Normalize the rest of the tree hanging off this ELEM_MATCH_OBJECT node. ElemMatchObjectMatchExpression* emome = static_cast<ElemMatchObjectMatchExpression*>(root); auto child = emome->releaseChild(); // normalizeTree(...) takes ownership of 'child', and then // transfers ownership of its return value to 'emome'. emome->resetChild(std::unique_ptr<MatchExpression>(normalizeTree(child.release()))); } else if (MatchExpression::ELEM_MATCH_VALUE == root->matchType()) { // Just normalize our children. for (size_t i = 0; i < root->getChildVector()->size(); ++i) { (*root->getChildVector())[i] = normalizeTree(root->getChild(i)); } } else if (MatchExpression::MATCH_IN == root->matchType()) { std::unique_ptr<InMatchExpression> in(static_cast<InMatchExpression*>(root)); // IN of 1 regex is the regex. if (in->getRegexes().size() == 1 && in->getEqualities().empty()) { RegexMatchExpression* childRe = in->getRegexes().begin()->get(); invariant(!childRe->getTag()); // Create a new RegexMatchExpression, because 'childRe' does not have a path. auto re = stdx::make_unique<RegexMatchExpression>(); re->init(in->path(), childRe->getString(), childRe->getFlags()).transitional_ignore(); if (in->getTag()) { re->setTag(in->getTag()->clone()); } return normalizeTree(re.release()); } // IN of 1 equality is the equality. if (in->getEqualities().size() == 1 && in->getRegexes().empty()) { auto eq = stdx::make_unique<EqualityMatchExpression>(); eq->init(in->path(), *(in->getEqualities().begin())).transitional_ignore(); eq->setCollator(in->getCollator()); if (in->getTag()) { eq->setTag(in->getTag()->clone()); } return eq.release(); } return in.release(); } return root; }
Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { // This is the skeleton of index selections that is inserted into the cache. std::unique_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree()); for (size_t i = 0; i < _orExpression->numChildren(); ++i) { MatchExpression* orChild = _orExpression->getChild(i); BranchPlanningResult* branchResult = _branchResults[i].get(); if (branchResult->cachedSolution.get()) { // We can get the index tags we need out of the cache. Status tagStatus = tagOrChildAccordingToCache( cacheData.get(), branchResult->cachedSolution->plannerData[0], orChild, _indexMap); if (!tagStatus.isOK()) { return tagStatus; } } else if (1 == branchResult->solutions.size()) { QuerySolution* soln = branchResult->solutions.front().get(); Status tagStatus = tagOrChildAccordingToCache( cacheData.get(), soln->cacheData.get(), orChild, _indexMap); if (!tagStatus.isOK()) { return tagStatus; } } else { // N solutions, rank them. // We already checked for zero solutions in planSubqueries(...). invariant(!branchResult->solutions.empty()); _ws->clear(); // We pass the SometimesCache option to the MPS because the SubplanStage currently does // not use the CachedPlanStage's eviction mechanism. We therefore are more conservative // about putting a potentially bad plan into the cache in the subplan path. // We temporarily add the MPS to _children to ensure that we pass down all // save/restore/invalidate messages that can be generated if pickBestPlan yields. invariant(_children.empty()); _children.emplace_back( stdx::make_unique<MultiPlanStage>(getOpCtx(), _collection, branchResult->canonicalQuery.get(), MultiPlanStage::CachingMode::SometimesCache)); ON_BLOCK_EXIT([&] { invariant(_children.size() == 1); // Make sure nothing else was added to _children. _children.pop_back(); }); MultiPlanStage* multiPlanStage = static_cast<MultiPlanStage*>(child().get()); // Dump all the solutions into the MPS. for (size_t ix = 0; ix < branchResult->solutions.size(); ++ix) { PlanStage* nextPlanRoot; invariant(StageBuilder::build(getOpCtx(), _collection, *branchResult->canonicalQuery, *branchResult->solutions[ix], _ws, &nextPlanRoot)); // Takes ownership of 'nextPlanRoot'. multiPlanStage->addPlan(std::move(branchResult->solutions[ix]), nextPlanRoot, _ws); } Status planSelectStat = multiPlanStage->pickBestPlan(yieldPolicy); if (!planSelectStat.isOK()) { return planSelectStat; } if (!multiPlanStage->bestPlanChosen()) { mongoutils::str::stream ss; ss << "Failed to pick best plan for subchild " << branchResult->canonicalQuery->toString(); return Status(ErrorCodes::BadValue, ss); } QuerySolution* bestSoln = multiPlanStage->bestSolution(); // Check that we have good cache data. For example, we don't cache things // for 2d indices. if (NULL == bestSoln->cacheData.get()) { mongoutils::str::stream ss; ss << "No cache data for subchild " << orChild->toString(); return Status(ErrorCodes::BadValue, ss); } if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) { mongoutils::str::stream ss; ss << "No indexed cache data for subchild " << orChild->toString(); return Status(ErrorCodes::BadValue, ss); } // Add the index assignments to our original query. Status tagStatus = QueryPlanner::tagAccordingToCache( orChild, bestSoln->cacheData->tree.get(), _indexMap); if (!tagStatus.isOK()) { mongoutils::str::stream ss; ss << "Failed to extract indices from subchild " << orChild->toString(); return Status(ErrorCodes::BadValue, ss); } cacheData->children.push_back(bestSoln->cacheData->tree->clone()); } } // Must do this before using the planner functionality. prepareForAccessPlanning(_orExpression.get()); // Use the cached index assignments to build solnRoot. Takes ownership of '_orExpression'. std::unique_ptr<QuerySolutionNode> solnRoot(QueryPlannerAccess::buildIndexedDataAccess( *_query, std::move(_orExpression), _plannerParams.indices, _plannerParams)); if (!solnRoot) { mongoutils::str::stream ss; ss << "Failed to build indexed data path for subplanned query\n"; return Status(ErrorCodes::BadValue, ss); } LOG(5) << "Subplanner: fully tagged tree is " << redact(solnRoot->toString()); // Takes ownership of 'solnRoot' _compositeSolution = QueryPlannerAnalysis::analyzeDataAccess(*_query, _plannerParams, std::move(solnRoot)); if (NULL == _compositeSolution.get()) { mongoutils::str::stream ss; ss << "Failed to analyze subplanned query"; return Status(ErrorCodes::BadValue, ss); } LOG(5) << "Subplanner: Composite solution is " << redact(_compositeSolution->toString()); // Use the index tags from planning each branch to construct the composite solution, // and set that solution as our child stage. _ws->clear(); PlanStage* root; invariant(StageBuilder::build( getOpCtx(), _collection, *_query, *_compositeSolution.get(), _ws, &root)); invariant(_children.empty()); _children.emplace_back(root); return Status::OK(); }
Status SubplanStage::planSubqueries() { _orExpression = _query->root()->shallowClone(); for (size_t i = 0; i < _plannerParams.indices.size(); ++i) { const IndexEntry& ie = _plannerParams.indices[i]; _indexMap[ie.name] = i; LOG(5) << "Subplanner: index " << i << " is " << ie; } for (size_t i = 0; i < _orExpression->numChildren(); ++i) { // We need a place to shove the results from planning this branch. _branchResults.push_back(stdx::make_unique<BranchPlanningResult>()); BranchPlanningResult* branchResult = _branchResults.back().get(); MatchExpression* orChild = _orExpression->getChild(i); // Turn the i-th child into its own query. auto statusWithCQ = CanonicalQuery::canonicalize(getOpCtx(), *_query, orChild); if (!statusWithCQ.isOK()) { mongoutils::str::stream ss; ss << "Can't canonicalize subchild " << orChild->toString() << " " << statusWithCQ.getStatus().reason(); return Status(ErrorCodes::BadValue, ss); } branchResult->canonicalQuery = std::move(statusWithCQ.getValue()); // Plan the i-th child. We might be able to find a plan for the i-th child in the plan // cache. If there's no cached plan, then we generate and rank plans using the MPS. const auto* planCache = _collection->infoCache()->getPlanCache(); if (auto cachedSol = planCache->getCacheEntryIfCacheable(*branchResult->canonicalQuery)) { // We have a CachedSolution. Store it for later. LOG(5) << "Subplanner: cached plan found for child " << i << " of " << _orExpression->numChildren(); branchResult->cachedSolution = std::move(cachedSol); } else { // No CachedSolution found. We'll have to plan from scratch. LOG(5) << "Subplanner: planning child " << i << " of " << _orExpression->numChildren(); // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from // considering any plan that's a collscan. invariant(branchResult->solutions.empty()); auto solutions = QueryPlanner::plan(*branchResult->canonicalQuery, _plannerParams); if (!solutions.isOK()) { mongoutils::str::stream ss; ss << "Can't plan for subchild " << branchResult->canonicalQuery->toString() << " " << solutions.getStatus().reason(); return Status(ErrorCodes::BadValue, ss); } branchResult->solutions = std::move(solutions.getValue()); LOG(5) << "Subplanner: got " << branchResult->solutions.size() << " solutions"; if (0 == branchResult->solutions.size()) { // If one child doesn't have an indexed solution, bail out. mongoutils::str::stream ss; ss << "No solutions for subchild " << branchResult->canonicalQuery->toString(); return Status(ErrorCodes::BadValue, ss); } } } return Status::OK(); }
// static Status QueryPlanner::plan(const CanonicalQuery& query, const QueryPlannerParams& params, std::vector<QuerySolution*>* out) { LOG(5) << "Beginning planning..." << endl << "=============================" << endl << "Options = " << optionString(params.options) << endl << "Canonical query:" << endl << query.toString() << "=============================" << endl; for (size_t i = 0; i < params.indices.size(); ++i) { LOG(5) << "Index " << i << " is " << params.indices[i].toString() << endl; } bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. TODO: This is a policy departure. Previously I think you // could ask for a tailable cursor and it just tried to give you one. Now, we fail if we // can't provide one. Is this what we want? if (query.getParsed().isTailable()) { if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) { QuerySolution* soln = buildCollscanSoln(query, true, params); if (NULL != soln) { out->push_back(soln); } } return Status::OK(); } // The hint or sort can be $natural: 1. If this happens, output a collscan. If both // a $natural hint and a $natural sort are specified, then the direction of the collscan // is determined by the sign of the sort (not the sign of the hint). if (!query.getParsed().getHint().isEmpty() || !query.getParsed().getSort().isEmpty()) { BSONObj hintObj = query.getParsed().getHint(); BSONObj sortObj = query.getParsed().getSort(); BSONElement naturalHint = hintObj.getFieldDotted("$natural"); BSONElement naturalSort = sortObj.getFieldDotted("$natural"); // A hint overrides a $natural sort. This means that we don't force a table // scan if there is a $natural sort with a non-$natural hint. if (!naturalHint.eoo() || (!naturalSort.eoo() && hintObj.isEmpty())) { LOG(5) << "Forcing a table scan due to hinted $natural\n"; // min/max are incompatible with $natural. if (canTableScan && query.getParsed().getMin().isEmpty() && query.getParsed().getMax().isEmpty()) { QuerySolution* soln = buildCollscanSoln(query, false, params); if (NULL != soln) { out->push_back(soln); } } return Status::OK(); } } // Figure out what fields we care about. unordered_set<string> fields; QueryPlannerIXSelect::getFields(query.root(), "", &fields); for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { LOG(5) << "Predicate over field '" << *it << "'" << endl; } // Filter our indices so we only look at indices that are over our predicates. vector<IndexEntry> relevantIndices; // Hints require us to only consider the hinted index. // If index filters in the query settings were used to override // the allowed indices for planning, we should not use the hinted index // requested in the query. BSONObj hintIndex; if (!params.indexFiltersApplied) { hintIndex = query.getParsed().getHint(); } // Snapshot is a form of a hint. If snapshot is set, try to use _id index to make a real // plan. If that fails, just scan the _id index. if (query.getParsed().isSnapshot()) { // Find the ID index in indexKeyPatterns. It's our hint. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { hintIndex = params.indices[i].keyPattern; break; } } } size_t hintIndexNumber = numeric_limits<size_t>::max(); if (hintIndex.isEmpty()) { QueryPlannerIXSelect::findRelevantIndices(fields, params.indices, &relevantIndices); } else { // Sigh. If the hint is specified it might be using the index name. BSONElement firstHintElt = hintIndex.firstElement(); if (str::equals("$hint", firstHintElt.fieldName()) && String == firstHintElt.type()) { string hintName = firstHintElt.String(); for (size_t i = 0; i < params.indices.size(); ++i) { if (params.indices[i].name == hintName) { LOG(5) << "Hint by name specified, restricting indices to " << params.indices[i].keyPattern.toString() << endl; relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); hintIndexNumber = i; hintIndex = params.indices[i].keyPattern; break; } } } else { for (size_t i = 0; i < params.indices.size(); ++i) { if (0 == params.indices[i].keyPattern.woCompare(hintIndex)) { relevantIndices.clear(); relevantIndices.push_back(params.indices[i]); LOG(5) << "Hint specified, restricting indices to " << hintIndex.toString() << endl; hintIndexNumber = i; break; } } } if (hintIndexNumber == numeric_limits<size_t>::max()) { return Status(ErrorCodes::BadValue, "bad hint"); } } // Deal with the .min() and .max() query options. If either exist we can only use an index // that matches the object inside. if (!query.getParsed().getMin().isEmpty() || !query.getParsed().getMax().isEmpty()) { BSONObj minObj = query.getParsed().getMin(); BSONObj maxObj = query.getParsed().getMax(); // The unfinished siblings of these objects may not be proper index keys because they // may be empty objects or have field names. When an index is picked to use for the // min/max query, these "finished" objects will always be valid index keys for the // index's key pattern. BSONObj finishedMinObj; BSONObj finishedMaxObj; // This is the index into params.indices[...] that we use. size_t idxNo = numeric_limits<size_t>::max(); // If there's an index hinted we need to be able to use it. if (!hintIndex.isEmpty()) { if (!minObj.isEmpty() && !indexCompatibleMaxMin(minObj, hintIndex)) { LOG(5) << "Minobj doesn't work with hint"; return Status(ErrorCodes::BadValue, "hint provided does not work with min query"); } if (!maxObj.isEmpty() && !indexCompatibleMaxMin(maxObj, hintIndex)) { LOG(5) << "Maxobj doesn't work with hint"; return Status(ErrorCodes::BadValue, "hint provided does not work with max query"); } const BSONObj& kp = params.indices[hintIndexNumber].keyPattern; finishedMinObj = finishMinObj(kp, minObj, maxObj); finishedMaxObj = finishMaxObj(kp, minObj, maxObj); // The min must be less than the max for the hinted index ordering. if (0 <= finishedMinObj.woCompare(finishedMaxObj, kp, false)) { LOG(5) << "Minobj/Maxobj don't work with hint"; return Status(ErrorCodes::BadValue, "hint provided does not work with min/max query"); } idxNo = hintIndexNumber; } else { // No hinted index, look for one that is compatible (has same field names and // ordering thereof). for (size_t i = 0; i < params.indices.size(); ++i) { const BSONObj& kp = params.indices[i].keyPattern; BSONObj toUse = minObj.isEmpty() ? maxObj : minObj; if (indexCompatibleMaxMin(toUse, kp)) { // In order to be fully compatible, the min has to be less than the max // according to the index key pattern ordering. The first step in verifying // this is "finish" the min and max by replacing empty objects and stripping // field names. finishedMinObj = finishMinObj(kp, minObj, maxObj); finishedMaxObj = finishMaxObj(kp, minObj, maxObj); // Now we have the final min and max. This index is only relevant for // the min/max query if min < max. if (0 >= finishedMinObj.woCompare(finishedMaxObj, kp, false)) { // Found a relevant index. idxNo = i; break; } // This index is not relevant; move on to the next. } } } if (idxNo == numeric_limits<size_t>::max()) { LOG(5) << "Can't find relevant index to use for max/min query"; // Can't find an index to use, bail out. return Status(ErrorCodes::BadValue, "unable to find relevant index for max/min query"); } LOG(5) << "Max/min query using index " << params.indices[idxNo].toString() << endl; // Make our scan and output. QuerySolutionNode* solnRoot = QueryPlannerAccess::makeIndexScan( params.indices[idxNo], query, params, finishedMinObj, finishedMaxObj); QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { out->push_back(soln); } return Status::OK(); } for (size_t i = 0; i < relevantIndices.size(); ++i) { LOG(2) << "Relevant index " << i << " is " << relevantIndices[i].toString() << endl; } // Figure out how useful each index is to each predicate. QueryPlannerIXSelect::rateIndices(query.root(), "", relevantIndices); QueryPlannerIXSelect::stripInvalidAssignments(query.root(), relevantIndices); // Unless we have GEO_NEAR, TEXT, or a projection, we may be able to apply an optimization // in which we strip unnecessary index assignments. // // Disallowed with projection because assignment to a non-unique index can allow the plan // to be covered. // // TEXT and GEO_NEAR are special because they require the use of a text/geo index in order // to be evaluated correctly. Stripping these "mandatory assignments" is therefore invalid. if (query.getParsed().getProj().isEmpty() && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { QueryPlannerIXSelect::stripUnneededAssignments(query.root(), relevantIndices); } // query.root() is now annotated with RelevantTag(s). LOG(5) << "Rated tree:" << endl << query.root()->toString(); // If there is a GEO_NEAR it must have an index it can use directly. MatchExpression* gnNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR, &gnNode)) { // No index for GEO_NEAR? No query. RelevantTag* tag = static_cast<RelevantTag*>(gnNode->getTag()); if (0 == tag->first.size() && 0 == tag->notFirst.size()) { LOG(5) << "Unable to find index for $geoNear query." << endl; // Don't leave tags on query tree. query.root()->resetTag(); return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query"); } LOG(5) << "Rated tree after geonear processing:" << query.root()->toString(); } // Likewise, if there is a TEXT it must have an index it can use directly. MatchExpression* textNode = NULL; if (QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT, &textNode)) { RelevantTag* tag = static_cast<RelevantTag*>(textNode->getTag()); // Exactly one text index required for TEXT. We need to check this explicitly because // the text stage can't be built if no text index exists or there is an ambiguity as to // which one to use. size_t textIndexCount = 0; for (size_t i = 0; i < params.indices.size(); i++) { if (INDEX_TEXT == params.indices[i].type) { textIndexCount++; } } if (textIndexCount != 1) { // Don't leave tags on query tree. query.root()->resetTag(); return Status(ErrorCodes::BadValue, "need exactly one text index for $text query"); } // Error if the text node is tagged with zero indices. if (0 == tag->first.size() && 0 == tag->notFirst.size()) { // Don't leave tags on query tree. query.root()->resetTag(); return Status(ErrorCodes::BadValue, "failed to use text index to satisfy $text query (if text index is " "compound, are equality predicates given for all prefix fields?)"); } // At this point, we know that there is only one text index and that the TEXT node is // assigned to it. invariant(1 == tag->first.size() + tag->notFirst.size()); LOG(5) << "Rated tree after text processing:" << query.root()->toString(); } // If we have any relevant indices, we try to create indexed plans. if (0 < relevantIndices.size()) { // The enumerator spits out trees tagged with IndexTag(s). PlanEnumeratorParams enumParams; enumParams.intersect = params.options & QueryPlannerParams::INDEX_INTERSECTION; enumParams.root = query.root(); enumParams.indices = &relevantIndices; PlanEnumerator isp(enumParams); isp.init(); MatchExpression* rawTree; while (isp.getNext(&rawTree) && (out->size() < params.maxIndexedSolutions)) { LOG(5) << "About to build solntree from tagged tree:" << endl << rawTree->toString(); // The tagged tree produced by the plan enumerator is not guaranteed // to be canonically sorted. In order to be compatible with the cached // data, sort the tagged tree according to CanonicalQuery ordering. std::unique_ptr<MatchExpression> clone(rawTree->shallowClone()); CanonicalQuery::sortTree(clone.get()); PlanCacheIndexTree* cacheData; Status indexTreeStatus = cacheDataFromTaggedTree(clone.get(), relevantIndices, &cacheData); if (!indexTreeStatus.isOK()) { LOG(5) << "Query is not cachable: " << indexTreeStatus.reason() << endl; } unique_ptr<PlanCacheIndexTree> autoData(cacheData); // This can fail if enumeration makes a mistake. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess( query, rawTree, false, relevantIndices, params); if (NULL == solnRoot) { continue; } QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, params, solnRoot); if (NULL != soln) { LOG(5) << "Planner: adding solution:" << endl << soln->toString(); if (indexTreeStatus.isOK()) { SolutionCacheData* scd = new SolutionCacheData(); scd->tree.reset(autoData.release()); soln->cacheData.reset(scd); } out->push_back(soln); } } } // Don't leave tags on query tree. query.root()->resetTag(); LOG(5) << "Planner: outputted " << out->size() << " indexed solutions.\n"; // Produce legible error message for failed OR planning with a TEXT child. // TODO: support collection scan for non-TEXT children of OR. if (out->size() == 0 && textNode != NULL && MatchExpression::OR == query.root()->matchType()) { MatchExpression* root = query.root(); for (size_t i = 0; i < root->numChildren(); ++i) { if (textNode == root->getChild(i)) { return Status(ErrorCodes::BadValue, "Failed to produce a solution for TEXT under OR - " "other non-TEXT clauses under OR have to be indexed as well."); } } } // An index was hinted. If there are any solutions, they use the hinted index. If not, we // scan the entire index to provide results and output that as our plan. This is the // desired behavior when an index is hinted that is not relevant to the query. if (!hintIndex.isEmpty()) { if (0 == out->size()) { QuerySolution* soln = buildWholeIXSoln(params.indices[hintIndexNumber], query, params); verify(NULL != soln); LOG(5) << "Planner: outputting soln that uses hinted index as scan." << endl; out->push_back(soln); } return Status::OK(); } // If a sort order is requested, there may be an index that provides it, even if that // index is not over any predicates in the query. // if (!query.getParsed().getSort().isEmpty() && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { // See if we have a sort provided from an index already. // This is implied by the presence of a non-blocking solution. bool usingIndexToSort = false; for (size_t i = 0; i < out->size(); ++i) { QuerySolution* soln = (*out)[i]; if (!soln->hasBlockingStage) { usingIndexToSort = true; break; } } if (!usingIndexToSort) { for (size_t i = 0; i < params.indices.size(); ++i) { const IndexEntry& index = params.indices[i]; // Only regular (non-plugin) indexes can be used to provide a sort, and only // non-sparse indexes can be used to provide a sort. // // TODO: Sparse indexes can't normally provide a sort, because non-indexed // documents could potentially be missing from the result set. However, if the // query predicate can be used to guarantee that all documents to be returned // are indexed, then the index should be able to provide the sort. // // For example: // - Sparse index {a: 1, b: 1} should be able to provide a sort for // find({b: 1}).sort({a: 1}). SERVER-13908. // - Index {a: 1, b: "2dsphere"} (which is "geo-sparse", if // 2dsphereIndexVersion=2) should be able to provide a sort for // find({b: GEO}).sort({a:1}). SERVER-10801. if (index.type != INDEX_BTREE) { continue; } if (index.sparse) { continue; } // Partial indexes can only be used to provide a sort only if the query predicate is // compatible. if (index.filterExpr && !expression::isSubsetOf(query.root(), index.filterExpr)) { continue; } const BSONObj kp = QueryPlannerAnalysis::getSortPattern(index.keyPattern); if (providesSort(query, kp)) { LOG(5) << "Planner: outputting soln that uses index to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params); if (NULL != soln) { PlanCacheIndexTree* indexTree = new PlanCacheIndexTree(); indexTree->setIndexEntry(params.indices[i]); SolutionCacheData* scd = new SolutionCacheData(); scd->tree.reset(indexTree); scd->solnType = SolutionCacheData::WHOLE_IXSCAN_SOLN; scd->wholeIXSolnDir = 1; soln->cacheData.reset(scd); out->push_back(soln); break; } } if (providesSort(query, QueryPlannerCommon::reverseSortObj(kp))) { LOG(5) << "Planner: outputting soln that uses (reverse) index " << "to provide sort." << endl; QuerySolution* soln = buildWholeIXSoln(params.indices[i], query, params, -1); if (NULL != soln) { PlanCacheIndexTree* indexTree = new PlanCacheIndexTree(); indexTree->setIndexEntry(params.indices[i]); SolutionCacheData* scd = new SolutionCacheData(); scd->tree.reset(indexTree); scd->solnType = SolutionCacheData::WHOLE_IXSCAN_SOLN; scd->wholeIXSolnDir = -1; soln->cacheData.reset(scd); out->push_back(soln); break; } } } } } // geoNear and text queries *require* an index. // Also, if a hint is specified it indicates that we MUST use it. bool possibleToCollscan = !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && hintIndex.isEmpty(); // The caller can explicitly ask for a collscan. bool collscanRequested = (params.options & QueryPlannerParams::INCLUDE_COLLSCAN); // No indexed plans? We must provide a collscan if possible or else we can't run the query. bool collscanNeeded = (0 == out->size() && canTableScan); if (possibleToCollscan && (collscanRequested || collscanNeeded)) { QuerySolution* collscan = buildCollscanSoln(query, false, params); if (NULL != collscan) { SolutionCacheData* scd = new SolutionCacheData(); scd->solnType = SolutionCacheData::COLLSCAN_SOLN; collscan->cacheData.reset(scd); out->push_back(collscan); LOG(5) << "Planner: outputting a collscan:" << endl << collscan->toString(); } } return Status::OK(); }
Status SubplanStage::planSubqueries() { // Adds the amount of time taken by planSubqueries() to executionTimeMillis. There's lots of // work that happens here, so this is needed for the time accounting to make sense. ScopedTimer timer(&_commonStats.executionTimeMillis); MatchExpression* orExpr = _query->root(); for (size_t i = 0; i < _plannerParams.indices.size(); ++i) { const IndexEntry& ie = _plannerParams.indices[i]; _indexMap[ie.keyPattern] = i; QLOG() << "Subplanner: index " << i << " is " << ie.toString() << endl; } const WhereCallbackReal whereCallback(_txn, _collection->ns().db()); for (size_t i = 0; i < orExpr->numChildren(); ++i) { // We need a place to shove the results from planning this branch. _branchResults.push_back(new BranchPlanningResult()); BranchPlanningResult* branchResult = _branchResults.back(); MatchExpression* orChild = orExpr->getChild(i); // Turn the i-th child into its own query. { CanonicalQuery* orChildCQ; Status childCQStatus = CanonicalQuery::canonicalize(*_query, orChild, &orChildCQ, whereCallback); if (!childCQStatus.isOK()) { mongoutils::str::stream ss; ss << "Can't canonicalize subchild " << orChild->toString() << " " << childCQStatus.reason(); return Status(ErrorCodes::BadValue, ss); } branchResult->canonicalQuery.reset(orChildCQ); } // Plan the i-th child. We might be able to find a plan for the i-th child in the plan // cache. If there's no cached plan, then we generate and rank plans using the MPS. CachedSolution* rawCS; if (PlanCache::shouldCacheQuery(*branchResult->canonicalQuery.get()) && _collection->infoCache()->getPlanCache()->get(*branchResult->canonicalQuery.get(), &rawCS).isOK()) { // We have a CachedSolution. Store it for later. QLOG() << "Subplanner: cached plan found for child " << i << " of " << orExpr->numChildren(); branchResult->cachedSolution.reset(rawCS); } else { // No CachedSolution found. We'll have to plan from scratch. QLOG() << "Subplanner: planning child " << i << " of " << orExpr->numChildren(); // We don't set NO_TABLE_SCAN because peeking at the cache data will keep us from // considering any plan that's a collscan. Status status = QueryPlanner::plan(*branchResult->canonicalQuery.get(), _plannerParams, &branchResult->solutions.mutableVector()); if (!status.isOK()) { mongoutils::str::stream ss; ss << "Can't plan for subchild " << branchResult->canonicalQuery->toString() << " " << status.reason(); return Status(ErrorCodes::BadValue, ss); } QLOG() << "Subplanner: got " << branchResult->solutions.size() << " solutions"; if (0 == branchResult->solutions.size()) { // If one child doesn't have an indexed solution, bail out. mongoutils::str::stream ss; ss << "No solutions for subchild " << branchResult->canonicalQuery->toString(); return Status(ErrorCodes::BadValue, ss); } } } return Status::OK(); }
bool SubplanRunner::runSubplans() { // This is what we annotate with the index selections and then turn into a solution. auto_ptr<OrMatchExpression> theOr( static_cast<OrMatchExpression*>(_query->root()->shallowClone())); // This is the skeleton of index selections that is inserted into the cache. auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree()); for (size_t i = 0; i < theOr->numChildren(); ++i) { MatchExpression* orChild = theOr->getChild(i); auto_ptr<CanonicalQuery> orChildCQ(_cqs.front()); _cqs.pop(); // 'solutions' is owned by the SubplanRunner instance until // it is popped from the queue. vector<QuerySolution*> solutions = _solutions.front(); _solutions.pop(); // We already checked for zero solutions in planSubqueries(...). invariant(!solutions.empty()); if (1 == solutions.size()) { // There is only one solution. Transfer ownership to an auto_ptr. auto_ptr<QuerySolution> autoSoln(solutions[0]); // We want a well-formed *indexed* solution. if (NULL == autoSoln->cacheData.get()) { // For example, we don't cache things for 2d indices. QLOG() << "Subplanner: No cache data for subchild " << orChild->toString(); return false; } if (SolutionCacheData::USE_INDEX_TAGS_SOLN != autoSoln->cacheData->solnType) { QLOG() << "Subplanner: No indexed cache data for subchild " << orChild->toString(); return false; } // Add the index assignments to our original query. Status tagStatus = QueryPlanner::tagAccordingToCache( orChild, autoSoln->cacheData->tree.get(), _indexMap); if (!tagStatus.isOK()) { QLOG() << "Subplanner: Failed to extract indices from subchild " << orChild->toString(); return false; } // Add the child's cache data to the cache data we're creating for the main query. cacheData->children.push_back(autoSoln->cacheData->tree->clone()); } else { // N solutions, rank them. Takes ownership of orChildCQ. // the working set will be shared by the candidate plans and owned by the runner WorkingSet* sharedWorkingSet = new WorkingSet(); MultiPlanStage* multiPlanStage = new MultiPlanStage(_collection, orChildCQ.get()); // Dump all the solutions into the MPR. for (size_t ix = 0; ix < solutions.size(); ++ix) { PlanStage* nextPlanRoot; verify(StageBuilder::build(_txn, _collection, *solutions[ix], sharedWorkingSet, &nextPlanRoot)); // Owns first two arguments multiPlanStage->addPlan(solutions[ix], nextPlanRoot, sharedWorkingSet); } multiPlanStage->pickBestPlan(); if (! multiPlanStage->bestPlanChosen()) { QLOG() << "Subplanner: Failed to pick best plan for subchild " << orChildCQ->toString(); return false; } Runner* mpr = new SingleSolutionRunner(_collection, orChildCQ.release(), multiPlanStage->bestSolution(), multiPlanStage, sharedWorkingSet); _underlyingRunner.reset(mpr); if (_killed) { QLOG() << "Subplanner: Killed while picking best plan for subchild " << orChild->toString(); return false; } QuerySolution* bestSoln = multiPlanStage->bestSolution(); if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) { QLOG() << "Subplanner: No indexed cache data for subchild " << orChild->toString(); return false; } // Add the index assignments to our original query. Status tagStatus = QueryPlanner::tagAccordingToCache( orChild, bestSoln->cacheData->tree.get(), _indexMap); if (!tagStatus.isOK()) { QLOG() << "Subplanner: Failed to extract indices from subchild " << orChild->toString(); return false; } cacheData->children.push_back(bestSoln->cacheData->tree->clone()); } } // Must do this before using the planner functionality. sortUsingTags(theOr.get()); // Use the cached index assignments to build solnRoot. Takes ownership of 'theOr' QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess( *_query, theOr.release(), false, _plannerParams.indices); if (NULL == solnRoot) { QLOG() << "Subplanner: Failed to build indexed data path for subplanned query\n"; return false; } QLOG() << "Subplanner: fully tagged tree is " << solnRoot->toString(); // Takes ownership of 'solnRoot' QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*_query, _plannerParams, solnRoot); if (NULL == soln) { QLOG() << "Subplanner: Failed to analyze subplanned query"; return false; } // We want our franken-solution to be cached. SolutionCacheData* scd = new SolutionCacheData(); scd->tree.reset(cacheData.release()); soln->cacheData.reset(scd); QLOG() << "Subplanner: Composite solution is " << soln->toString() << endl; // We use one of these even if there is one plan. We do this so that the entry is cached // with stats obtained in the same fashion as a competitive ranking would have obtained // them. MultiPlanStage* multiPlanStage = new MultiPlanStage(_collection, _query.get()); WorkingSet* ws = new WorkingSet(); PlanStage* root; verify(StageBuilder::build(_txn, _collection, *soln, ws, &root)); multiPlanStage->addPlan(soln, root, ws); // Takes ownership first two arguments. multiPlanStage->pickBestPlan(); if (! multiPlanStage->bestPlanChosen()) { QLOG() << "Subplanner: Failed to pick best plan for subchild " << _query->toString(); return false; } Runner* mpr = new SingleSolutionRunner(_collection, _query.release(), multiPlanStage->bestSolution(), multiPlanStage, ws); _underlyingRunner.reset(mpr); return true; }
Status SubplanStage::choosePlanForSubqueries(PlanYieldPolicy* yieldPolicy) { // This is what we annotate with the index selections and then turn into a solution. auto_ptr<OrMatchExpression> orExpr( static_cast<OrMatchExpression*>(_query->root()->shallowClone())); // This is the skeleton of index selections that is inserted into the cache. auto_ptr<PlanCacheIndexTree> cacheData(new PlanCacheIndexTree()); for (size_t i = 0; i < orExpr->numChildren(); ++i) { MatchExpression* orChild = orExpr->getChild(i); BranchPlanningResult* branchResult = _branchResults[i]; if (branchResult->cachedSolution.get()) { // We can get the index tags we need out of the cache. Status tagStatus = tagOrChildAccordingToCache( cacheData.get(), branchResult->cachedSolution->plannerData[0], orChild, _indexMap); if (!tagStatus.isOK()) { return tagStatus; } } else if (1 == branchResult->solutions.size()) { QuerySolution* soln = branchResult->solutions.front(); Status tagStatus = tagOrChildAccordingToCache(cacheData.get(), soln->cacheData.get(), orChild, _indexMap); if (!tagStatus.isOK()) { return tagStatus; } } else { // N solutions, rank them. // We already checked for zero solutions in planSubqueries(...). invariant(!branchResult->solutions.empty()); _ws->clear(); _child.reset(new MultiPlanStage(_txn, _collection, branchResult->canonicalQuery.get())); MultiPlanStage* multiPlanStage = static_cast<MultiPlanStage*>(_child.get()); // Dump all the solutions into the MPS. for (size_t ix = 0; ix < branchResult->solutions.size(); ++ix) { PlanStage* nextPlanRoot; invariant(StageBuilder::build(_txn, _collection, *branchResult->solutions[ix], _ws, &nextPlanRoot)); // Takes ownership of solution with index 'ix' and 'nextPlanRoot'. multiPlanStage->addPlan(branchResult->solutions.releaseAt(ix), nextPlanRoot, _ws); } Status planSelectStat = multiPlanStage->pickBestPlan(yieldPolicy); if (!planSelectStat.isOK()) { return planSelectStat; } if (!multiPlanStage->bestPlanChosen()) { mongoutils::str::stream ss; ss << "Failed to pick best plan for subchild " << branchResult->canonicalQuery->toString(); return Status(ErrorCodes::BadValue, ss); } QuerySolution* bestSoln = multiPlanStage->bestSolution(); // Check that we have good cache data. For example, we don't cache things // for 2d indices. if (NULL == bestSoln->cacheData.get()) { mongoutils::str::stream ss; ss << "No cache data for subchild " << orChild->toString(); return Status(ErrorCodes::BadValue, ss); } if (SolutionCacheData::USE_INDEX_TAGS_SOLN != bestSoln->cacheData->solnType) { mongoutils::str::stream ss; ss << "No indexed cache data for subchild " << orChild->toString(); return Status(ErrorCodes::BadValue, ss); } // Add the index assignments to our original query. Status tagStatus = QueryPlanner::tagAccordingToCache( orChild, bestSoln->cacheData->tree.get(), _indexMap); if (!tagStatus.isOK()) { mongoutils::str::stream ss; ss << "Failed to extract indices from subchild " << orChild->toString(); return Status(ErrorCodes::BadValue, ss); } cacheData->children.push_back(bestSoln->cacheData->tree->clone()); } } // Must do this before using the planner functionality. sortUsingTags(orExpr.get()); // Use the cached index assignments to build solnRoot. Takes ownership of 'orExpr'. QuerySolutionNode* solnRoot = QueryPlannerAccess::buildIndexedDataAccess( *_query, orExpr.release(), false, _plannerParams.indices, _plannerParams); if (NULL == solnRoot) { mongoutils::str::stream ss; ss << "Failed to build indexed data path for subplanned query\n"; return Status(ErrorCodes::BadValue, ss); } QLOG() << "Subplanner: fully tagged tree is " << solnRoot->toString(); // Takes ownership of 'solnRoot' _compositeSolution.reset(QueryPlannerAnalysis::analyzeDataAccess(*_query, _plannerParams, solnRoot)); if (NULL == _compositeSolution.get()) { mongoutils::str::stream ss; ss << "Failed to analyze subplanned query"; return Status(ErrorCodes::BadValue, ss); } QLOG() << "Subplanner: Composite solution is " << _compositeSolution->toString() << endl; // Use the index tags from planning each branch to construct the composite solution, // and set that solution as our child stage. _ws->clear(); PlanStage* root; invariant(StageBuilder::build(_txn, _collection, *_compositeSolution.get(), _ws, &root)); _child.reset(root); return Status::OK(); }
Status UpdateDriver::populateDocumentWithQueryFields(const CanonicalQuery* query, mutablebson::Document& doc) const { MatchExpression* root = query->root(); MatchExpression::MatchType rootType = root->matchType(); // These copies are needed until we apply the modifiers at the end. std::vector<BSONObj> copies; // We only care about equality and "and"ed equality fields, everything else is ignored if (rootType != MatchExpression::EQ && rootType != MatchExpression::AND) return Status::OK(); if (isDocReplacement()) { BSONElement idElem = query->getQueryObj().getField("_id"); // Replacement mods need the _id field copied explicitly. if (idElem.ok()) { mb::Element elem = doc.makeElement(idElem); return doc.root().pushFront(elem); } return Status::OK(); } // Create a new UpdateDriver to create the base doc from the query Options opts; opts.logOp = false; opts.multi = false; opts.upsert = true; opts.modOptions = modOptions(); UpdateDriver insertDriver(opts); insertDriver.setContext(ModifierInterface::ExecInfo::INSERT_CONTEXT); // If we are a single equality match query if (root->matchType() == MatchExpression::EQ) { EqualityMatchExpression* eqMatch = static_cast<EqualityMatchExpression*>(root); const BSONElement matchData = eqMatch->getData(); BSONElement childElem = matchData; // Make copy to new path if not the same field name (for cases like $all) if (!root->path().empty() && matchData.fieldNameStringData() != root->path()) { BSONObjBuilder copyBuilder; copyBuilder.appendAs(eqMatch->getData(), root->path()); const BSONObj copy = copyBuilder.obj(); copies.push_back(copy); childElem = copy[root->path()]; } // Add this element as a $set modifier Status s = insertDriver.addAndParse(modifiertable::MOD_SET, childElem); if (!s.isOK()) return s; } else { // parse query $set mods, including only equality stuff for (size_t i = 0; i < root->numChildren(); ++i) { MatchExpression* child = root->getChild(i); if (child->matchType() == MatchExpression::EQ) { EqualityMatchExpression* eqMatch = static_cast<EqualityMatchExpression*>(child); const BSONElement matchData = eqMatch->getData(); BSONElement childElem = matchData; // Make copy to new path if not the same field name (for cases like $all) if (!child->path().empty() && matchData.fieldNameStringData() != child->path()) { BSONObjBuilder copyBuilder; copyBuilder.appendAs(eqMatch->getData(), child->path()); const BSONObj copy = copyBuilder.obj(); copies.push_back(copy); childElem = copy[child->path()]; } // Add this element as a $set modifier Status s = insertDriver.addAndParse(modifiertable::MOD_SET, childElem); if (!s.isOK()) return s; } } } // update the document with base field Status s = insertDriver.update(StringData(), &doc); copies.clear(); if (!s.isOK()) { return Status(ErrorCodes::UnsupportedFormat, str::stream() << "Cannot create base during" " insert of update. Caused by :" << s.toString()); } return Status::OK(); }