bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) { const QueryRequest& qr = query.getQueryRequest(); const MatchExpression* expr = query.root(); // Hint provided if (!qr.getHint().isEmpty()) { return false; } // Min provided // Min queries are a special case of hinted queries. if (!qr.getMin().isEmpty()) { return false; } // Max provided // Similar to min, max queries are a special case of hinted queries. if (!qr.getMax().isEmpty()) { return false; } // Tailable cursors won't get cached, just turn into collscans. if (query.getQueryRequest().isTailable()) { return false; } // We can only subplan rooted $or queries, and only if they have at least one clause. return MatchExpression::OR == expr->matchType() && expr->numChildren() > 0; }
// static StatusWith<std::unique_ptr<CanonicalQuery>> CanonicalQuery::canonicalize( OperationContext* opCtx, const CanonicalQuery& baseQuery, MatchExpression* root, const ExtensionsCallback& extensionsCallback) { // TODO: we should be passing the filter corresponding to 'root' to the QR rather than the base // query's filter, baseQuery.getQueryRequest().getFilter(). auto qr = stdx::make_unique<QueryRequest>(baseQuery.nss()); qr->setFilter(baseQuery.getQueryRequest().getFilter()); qr->setProj(baseQuery.getQueryRequest().getProj()); qr->setSort(baseQuery.getQueryRequest().getSort()); qr->setCollation(baseQuery.getQueryRequest().getCollation()); qr->setExplain(baseQuery.getQueryRequest().isExplain()); auto qrStatus = qr->validate(); if (!qrStatus.isOK()) { return qrStatus; } std::unique_ptr<CollatorInterface> collator; if (baseQuery.getCollator()) { collator = baseQuery.getCollator()->clone(); } // Make the CQ we'll hopefully return. std::unique_ptr<CanonicalQuery> cq(new CanonicalQuery()); Status initStatus = cq->init( std::move(qr), extensionsCallback, root->shallowClone().release(), std::move(collator)); if (!initStatus.isOK()) { return initStatus; } return std::move(cq); }
// static StatusWith<std::unique_ptr<CanonicalQuery>> CanonicalQuery::canonicalize( OperationContext* opCtx, const CanonicalQuery& baseQuery, MatchExpression* root) { auto qr = stdx::make_unique<QueryRequest>(baseQuery.nss()); BSONObjBuilder builder; root->serialize(&builder); qr->setFilter(builder.obj()); qr->setProj(baseQuery.getQueryRequest().getProj()); qr->setSort(baseQuery.getQueryRequest().getSort()); qr->setCollation(baseQuery.getQueryRequest().getCollation()); qr->setExplain(baseQuery.getQueryRequest().isExplain()); auto qrStatus = qr->validate(); if (!qrStatus.isOK()) { return qrStatus; } std::unique_ptr<CollatorInterface> collator; if (baseQuery.getCollator()) { collator = baseQuery.getCollator()->clone(); } // Make the CQ we'll hopefully return. std::unique_ptr<CanonicalQuery> cq(new CanonicalQuery()); Status initStatus = cq->init(opCtx, std::move(qr), baseQuery.canHaveNoopMatchNodes(), root->shallowClone(), std::move(collator)); if (!initStatus.isOK()) { return initStatus; } return std::move(cq); }
// static bool IDHackStage::supportsQuery(Collection* collection, const CanonicalQuery& query) { return !query.getQueryRequest().showRecordId() && query.getQueryRequest().getHint().isEmpty() && !query.getQueryRequest().getSkip() && CanonicalQuery::isSimpleIdQuery(query.getQueryRequest().getFilter()) && !query.getQueryRequest().isTailable() && CollatorInterface::collatorsMatch(query.getCollator(), collection->getDefaultCollator()); }
bool SubplanStage::canUseSubplanning(const CanonicalQuery& query) { const QueryRequest& qr = query.getQueryRequest(); const MatchExpression* expr = query.root(); // Hint provided if (!qr.getHint().isEmpty()) { return false; } // Min provided // Min queries are a special case of hinted queries. if (!qr.getMin().isEmpty()) { return false; } // Max provided // Similar to min, max queries are a special case of hinted queries. if (!qr.getMax().isEmpty()) { return false; } // Tailable cursors won't get cached, just turn into collscans. if (query.getQueryRequest().isTailable()) { return false; } // Snapshot is really a hint. if (query.getQueryRequest().isSnapshot()) { return false; } // TODO: For now we only allow rooted OR. We should consider also allowing contained OR that // does not have a TEXT or GEO_NEAR node. return MatchExpression::OR == expr->matchType(); }
// static size_t MultiPlanStage::getTrialPeriodNumToReturn(const CanonicalQuery& query) { // Determine the number of results which we will produce during the plan // ranking phase before stopping. size_t numResults = static_cast<size_t>(internalQueryPlanEvaluationMaxResults.load()); if (query.getQueryRequest().getNToReturn()) { numResults = std::min(static_cast<size_t>(*query.getQueryRequest().getNToReturn()), numResults); } else if (query.getQueryRequest().getLimit()) { numResults = std::min(static_cast<size_t>(*query.getQueryRequest().getLimit()), numResults); } return numResults; }
bool PlanCache::shouldCacheQuery(const CanonicalQuery& query) { const QueryRequest& qr = query.getQueryRequest(); const MatchExpression* expr = query.root(); // Collection scan // No sort order requested if (qr.getSort().isEmpty() && expr->matchType() == MatchExpression::AND && expr->numChildren() == 0) { return false; } // Hint provided if (!qr.getHint().isEmpty()) { return false; } // Min provided // Min queries are a special case of hinted queries. if (!qr.getMin().isEmpty()) { return false; } // Max provided // Similar to min, max queries are a special case of hinted queries. if (!qr.getMax().isEmpty()) { return false; } // We don't read or write from the plan cache for explain. This ensures // that explain queries don't affect cache state, and it also makes // sure that we can always generate information regarding rejected plans // and/or trial period execution of candidate plans. if (qr.isExplain()) { return false; } // Tailable cursors won't get cached, just turn into collscans. if (query.getQueryRequest().isTailable()) { return false; } // Snapshot is really a hint. if (query.getQueryRequest().isSnapshot()) { return false; } return true; }
PlanCacheKey PlanCache::computeKey(const CanonicalQuery& cq) const { StringBuilder keyBuilder; encodeKeyForMatch(cq.root(), &keyBuilder); encodeKeyForSort(cq.getQueryRequest().getSort(), &keyBuilder); encodeKeyForProj(cq.getQueryRequest().getProj(), &keyBuilder); return keyBuilder.str(); }
Status PlanCache::add(const CanonicalQuery& query, const std::vector<QuerySolution*>& solns, PlanRankingDecision* why, Date_t now) { invariant(why); if (solns.empty()) { return Status(ErrorCodes::BadValue, "no solutions provided"); } if (why->stats.size() != solns.size()) { return Status(ErrorCodes::BadValue, "number of stats in decision must match solutions"); } if (why->scores.size() != solns.size()) { return Status(ErrorCodes::BadValue, "number of scores in decision must match solutions"); } if (why->candidateOrder.size() != solns.size()) { return Status(ErrorCodes::BadValue, "candidate ordering entries in decision must match solutions"); } PlanCacheEntry* entry = new PlanCacheEntry(solns, why); const QueryRequest& qr = query.getQueryRequest(); entry->query = qr.getFilter().getOwned(); entry->sort = qr.getSort().getOwned(); if (query.getCollator()) { entry->collation = query.getCollator()->getSpec().toBSON(); } entry->timeOfCreation = now; // Strip projections on $-prefixed fields, as these are added by internal callers of the query // system and are not considered part of the user projection. BSONObjBuilder projBuilder; for (auto elem : qr.getProj()) { if (elem.fieldName()[0] == '$') { continue; } projBuilder.append(elem); } entry->projection = projBuilder.obj(); stdx::lock_guard<stdx::mutex> cacheLock(_cacheMutex); std::unique_ptr<PlanCacheEntry> evictedEntry = _cache.add(computeKey(query), entry); if (NULL != evictedEntry.get()) { LOG(1) << _ns << ": plan cache maximum size exceeded - " << "removed least recently used entry " << redact(evictedEntry->toString()); } return Status::OK(); }
void QuerySettings::setAllowedIndices(const CanonicalQuery& canonicalQuery, const PlanCacheKey& key, const BSONObjSet& indexKeyPatterns, const stdx::unordered_set<std::string>& indexNames) { const QueryRequest& qr = canonicalQuery.getQueryRequest(); const BSONObj& query = qr.getFilter(); const BSONObj& sort = qr.getSort(); const BSONObj& projection = qr.getProj(); const BSONObj collation = canonicalQuery.getCollator() ? canonicalQuery.getCollator()->getSpec().toBSON() : BSONObj(); stdx::lock_guard<stdx::mutex> cacheLock(_mutex); _allowedIndexEntryMap.erase(key); _allowedIndexEntryMap.emplace( std::piecewise_construct, std::forward_as_tuple(key), std::forward_as_tuple(query, sort, projection, collation, indexKeyPatterns, indexNames)); }
void QuerySettings::setAllowedIndices(const CanonicalQuery& canonicalQuery, const PlanCacheKey& key, const std::vector<BSONObj>& indexes) { const QueryRequest& qr = canonicalQuery.getQueryRequest(); const BSONObj& query = qr.getFilter(); const BSONObj& sort = qr.getSort(); const BSONObj& projection = qr.getProj(); AllowedIndexEntry* entry = new AllowedIndexEntry(query, sort, projection, indexes); stdx::lock_guard<stdx::mutex> cacheLock(_mutex); AllowedIndexEntryMap::iterator i = _allowedIndexEntryMap.find(key); // Replace existing entry. if (i != _allowedIndexEntryMap.end()) { AllowedIndexEntry* entry = i->second; delete entry; } _allowedIndexEntryMap[key] = entry; }
StatusWith<CursorId> ClusterFind::runQuery(OperationContext* txn, const CanonicalQuery& query, const ReadPreferenceSetting& readPref, std::vector<BSONObj>* results) { invariant(results); // Projection on the reserved sort key field is illegal in mongos. if (query.getQueryRequest().getProj().hasField(ClusterClientCursorParams::kSortKeyField)) { return {ErrorCodes::BadValue, str::stream() << "Projection contains illegal field '" << ClusterClientCursorParams::kSortKeyField << "': " << query.getQueryRequest().getProj()}; } auto dbConfig = grid.catalogCache()->getDatabase(txn, query.nss().db().toString()); if (dbConfig.getStatus() == ErrorCodes::NamespaceNotFound) { // If the database doesn't exist, we successfully return an empty result set without // creating a cursor. return CursorId(0); } else if (!dbConfig.isOK()) { return dbConfig.getStatus(); } std::shared_ptr<ChunkManager> chunkManager; std::shared_ptr<Shard> primary; dbConfig.getValue()->getChunkManagerOrPrimary(txn, query.nss().ns(), chunkManager, primary); // Re-target and re-send the initial find command to the shards until we have established the // shard version. for (size_t retries = 1; retries <= kMaxStaleConfigRetries; ++retries) { auto cursorId = runQueryWithoutRetrying( txn, query, readPref, chunkManager.get(), std::move(primary), results); if (cursorId.isOK()) { return cursorId; } auto status = std::move(cursorId.getStatus()); if (!ErrorCodes::isStaleShardingError(status.code()) && status != ErrorCodes::ShardNotFound) { // Errors other than trying to reach a non existent shard or receiving a stale // metadata message from MongoD are fatal to the operation. Network errors and // replication retries happen at the level of the AsyncResultsMerger. return status; } LOG(1) << "Received error status for query " << query.toStringShort() << " on attempt " << retries << " of " << kMaxStaleConfigRetries << ": " << status; const bool staleEpoch = (status == ErrorCodes::StaleEpoch); if (staleEpoch) { if (!dbConfig.getValue()->reload(txn)) { // If the reload failed that means the database wasn't found, so successfully return // an empty result set without creating a cursor. return CursorId(0); } } chunkManager = dbConfig.getValue()->getChunkManagerIfExists(txn, query.nss().ns(), true, staleEpoch); if (!chunkManager) { dbConfig.getValue()->getChunkManagerOrPrimary( txn, query.nss().ns(), chunkManager, primary); } } return {ErrorCodes::StaleShardVersion, str::stream() << "Retried " << kMaxStaleConfigRetries << " times without successfully establishing shard version."}; }
// static StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan( const CanonicalQuery& query, const QueryPlannerParams& params) { LOG(5) << "Beginning planning..." << endl << "=============================" << endl << "Options = " << optionString(params.options) << endl << "Canonical query:" << endl << redact(query.toString()) << "============================="; std::vector<std::unique_ptr<QuerySolution>> out; for (size_t i = 0; i < params.indices.size(); ++i) { LOG(5) << "Index " << i << " is " << params.indices[i].toString(); } const bool canTableScan = !(params.options & QueryPlannerParams::NO_TABLE_SCAN); const bool isTailable = query.getQueryRequest().isTailable(); // If the query requests a tailable cursor, the only solution is a collscan + filter with // tailable set on the collscan. if (isTailable) { if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && canTableScan) { auto soln = buildCollscanSoln(query, isTailable, params); if (soln) { out.push_back(std::move(soln)); } } return {std::move(out)}; } // The hint or sort can be $natural: 1. If this happens, output a collscan. If both // a $natural hint and a $natural sort are specified, then the direction of the collscan // is determined by the sign of the sort (not the sign of the hint). if (!query.getQueryRequest().getHint().isEmpty() || !query.getQueryRequest().getSort().isEmpty()) { BSONObj hintObj = query.getQueryRequest().getHint(); BSONObj sortObj = query.getQueryRequest().getSort(); BSONElement naturalHint = dps::extractElementAtPath(hintObj, "$natural"); BSONElement naturalSort = dps::extractElementAtPath(sortObj, "$natural"); // A hint overrides a $natural sort. This means that we don't force a table // scan if there is a $natural sort with a non-$natural hint. if (!naturalHint.eoo() || (!naturalSort.eoo() && hintObj.isEmpty())) { LOG(5) << "Forcing a table scan due to hinted $natural"; // min/max are incompatible with $natural. if (canTableScan && query.getQueryRequest().getMin().isEmpty() && query.getQueryRequest().getMax().isEmpty()) { auto soln = buildCollscanSoln(query, isTailable, params); if (soln) { out.push_back(std::move(soln)); } } return {std::move(out)}; } } // Figure out what fields we care about. unordered_set<string> fields; QueryPlannerIXSelect::getFields(query.root(), "", &fields); for (unordered_set<string>::const_iterator it = fields.begin(); it != fields.end(); ++it) { LOG(5) << "Predicate over field '" << *it << "'"; } // Filter our indices so we only look at indices that are over our predicates. vector<IndexEntry> relevantIndices; // Hints require us to only consider the hinted index. // If index filters in the query settings were used to override // the allowed indices for planning, we should not use the hinted index // requested in the query. BSONObj hintIndex; if (!params.indexFiltersApplied) { hintIndex = query.getQueryRequest().getHint(); } // If snapshot is set, default to collscanning. If the query param SNAPSHOT_USE_ID is set, // snapshot is a form of a hint, so try to use _id index to make a real plan. If that fails, // just scan the _id index. // // Don't do this if the query is a geonear or text as as text search queries must be answered // using full text indices and geoNear queries must be answered using geospatial indices. if (query.getQueryRequest().isSnapshot()) { RARELY { warning() << "The snapshot option is deprecated. See " "http://dochub.mongodb.org/core/snapshot-deprecation"; } if (!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO_NEAR) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT)) { const bool useIXScan = params.options & QueryPlannerParams::SNAPSHOT_USE_ID; if (!useIXScan) { auto soln = buildCollscanSoln(query, isTailable, params); if (soln) { out.push_back(std::move(soln)); } return {std::move(out)}; } else { // Find the ID index in indexKeyPatterns. It's our hint. for (size_t i = 0; i < params.indices.size(); ++i) { if (isIdIndex(params.indices[i].keyPattern)) { hintIndex = params.indices[i].keyPattern; break; } } } } }
bool providesSort(const CanonicalQuery& query, const BSONObj& kp) { return query.getQueryRequest().getSort().isPrefixOf(kp, SimpleBSONElementComparator::kInstance); }
std::unique_ptr<QuerySolution> QueryPlannerAnalysis::analyzeDataAccess( const CanonicalQuery& query, const QueryPlannerParams& params, std::unique_ptr<QuerySolutionNode> solnRoot) { auto soln = std::make_unique<QuerySolution>(); soln->filterData = query.getQueryObj(); soln->indexFilterApplied = params.indexFiltersApplied; solnRoot->computeProperties(); analyzeGeo(params, solnRoot.get()); // solnRoot finds all our results. Let's see what transformations we must perform to the // data. // If we're answering a query on a sharded system, we need to drop documents that aren't // logically part of our shard. if (params.options & QueryPlannerParams::INCLUDE_SHARD_FILTER) { if (!solnRoot->fetched()) { // See if we need to fetch information for our shard key. // NOTE: Solution nodes only list ordinary, non-transformed index keys for now bool fetch = false; BSONObjIterator it(params.shardKey); while (it.more()) { BSONElement nextEl = it.next(); if (!solnRoot->hasField(nextEl.fieldName())) { fetch = true; break; } } if (fetch) { FetchNode* fetchNode = new FetchNode(); fetchNode->children.push_back(solnRoot.release()); solnRoot.reset(fetchNode); } } ShardingFilterNode* sfn = new ShardingFilterNode(); sfn->children.push_back(solnRoot.release()); solnRoot.reset(sfn); } bool hasSortStage = false; solnRoot.reset(analyzeSort(query, params, solnRoot.release(), &hasSortStage)); // This can happen if we need to create a blocking sort stage and we're not allowed to. if (!solnRoot) { return nullptr; } // A solution can be blocking if it has a blocking sort stage or // a hashed AND stage. bool hasAndHashStage = hasNode(solnRoot.get(), STAGE_AND_HASH); soln->hasBlockingStage = hasSortStage || hasAndHashStage; const QueryRequest& qr = query.getQueryRequest(); if (qr.getSkip()) { auto skip = std::make_unique<SkipNode>(); skip->skip = *qr.getSkip(); skip->children.push_back(solnRoot.release()); solnRoot = std::move(skip); } // Project the results. if (query.getProj()) { solnRoot = analyzeProjection(query, std::move(solnRoot), hasSortStage); // If we don't have a covered project, and we're not allowed to put an uncovered one in, // bail out. if (solnRoot->fetched() && params.options & QueryPlannerParams::NO_UNCOVERED_PROJECTIONS) return nullptr; } else { // If there's no projection, we must fetch, as the user wants the entire doc. if (!solnRoot->fetched() && !(params.options & QueryPlannerParams::IS_COUNT)) { FetchNode* fetch = new FetchNode(); fetch->children.push_back(solnRoot.release()); solnRoot.reset(fetch); } } // When there is both a blocking sort and a limit, the limit will // be enforced by the blocking sort. // Otherwise, we need to limit the results in the case of a hard limit // (ie. limit in raw query is negative) if (!hasSortStage) { // We don't have a sort stage. This means that, if there is a limit, we will have // to enforce it ourselves since it's not handled inside SORT. if (qr.getLimit()) { LimitNode* limit = new LimitNode(); limit->limit = *qr.getLimit(); limit->children.push_back(solnRoot.release()); solnRoot.reset(limit); } else if (qr.getNToReturn() && !qr.wantMore()) { // We have a "legacy limit", i.e. a negative ntoreturn value from an OP_QUERY style // find. LimitNode* limit = new LimitNode(); limit->limit = *qr.getNToReturn(); limit->children.push_back(solnRoot.release()); solnRoot.reset(limit); } } soln->root = std::move(solnRoot); return soln; }
// static bool QueryPlannerAnalysis::explodeForSort(const CanonicalQuery& query, const QueryPlannerParams& params, QuerySolutionNode** solnRoot) { vector<QuerySolutionNode*> leafNodes; QuerySolutionNode* toReplace; if (!structureOKForExplode(*solnRoot, &toReplace)) { return false; } getLeafNodes(*solnRoot, &leafNodes); const BSONObj& desiredSort = query.getQueryRequest().getSort(); // How many scan leaves will result from our expansion? size_t totalNumScans = 0; // The value of entry i is how many scans we want to blow up for leafNodes[i]. // We calculate this in the loop below and might as well reuse it if we blow up // that scan. vector<size_t> fieldsToExplode; // The sort order we're looking for has to possibly be provided by each of the index scans // upon explosion. for (size_t i = 0; i < leafNodes.size(); ++i) { // We can do this because structureOKForExplode is only true if the leaves are index // scans. IndexScanNode* isn = static_cast<IndexScanNode*>(leafNodes[i]); const IndexBounds& bounds = isn->bounds; // Not a point interval prefix, can't try to rewrite. if (bounds.isSimpleRange) { return false; } if (isn->index.multikey && isn->index.multikeyPaths.empty()) { // The index is multikey but has no path-level multikeyness metadata. In this case, the // index can never provide a sort. return false; } // How many scans will we create if we blow up this ixscan? size_t numScans = 1; // Skip every field that is a union of point intervals and build the resulting sort // order from the remaining fields. BSONObjIterator kpIt(isn->index.keyPattern); size_t boundsIdx = 0; while (kpIt.more()) { const OrderedIntervalList& oil = bounds.fields[boundsIdx]; if (!isUnionOfPoints(oil)) { break; } numScans *= oil.intervals.size(); kpIt.next(); ++boundsIdx; } // There's no sort order left to gain by exploding. Just go home. TODO: verify nothing // clever we can do here. if (!kpIt.more()) { return false; } // Only explode if there's at least one field to explode for this scan. if (0 == boundsIdx) { return false; } // The rest of the fields define the sort order we could obtain by exploding // the bounds. BSONObjBuilder resultingSortBob; while (kpIt.more()) { auto elem = kpIt.next(); if (isn->multikeyFields.find(elem.fieldNameStringData()) != isn->multikeyFields.end()) { // One of the indexed fields providing the sort is multikey. It is not correct for a // field with multikey components to provide a sort, so bail out. return false; } resultingSortBob.append(elem); } // See if it's the order we're looking for. BSONObj possibleSort = resultingSortBob.obj(); if (!desiredSort.isPrefixOf(possibleSort, SimpleBSONElementComparator::kInstance)) { // We can't get the sort order from the index scan. See if we can // get the sort by reversing the scan. BSONObj reversePossibleSort = QueryPlannerCommon::reverseSortObj(possibleSort); if (!desiredSort.isPrefixOf(reversePossibleSort, SimpleBSONElementComparator::kInstance)) { // Can't get the sort order from the reversed index scan either. Give up. return false; } else { // We can get the sort order we need if we reverse the scan. QueryPlannerCommon::reverseScans(isn); } } // Do some bookkeeping to see how many ixscans we'll create total. totalNumScans += numScans; // And for this scan how many fields we expand. fieldsToExplode.push_back(boundsIdx); } // Too many ixscans spoil the performance. if (totalNumScans > (size_t)internalQueryMaxScansToExplode.load()) { LOG(5) << "Could expand ixscans to pull out sort order but resulting scan count" << "(" << totalNumScans << ") is too high."; return false; } // If we're here, we can (probably? depends on how restrictive the structure check is) // get our sort order via ixscan blow-up. MergeSortNode* merge = new MergeSortNode(); merge->sort = desiredSort; for (size_t i = 0; i < leafNodes.size(); ++i) { IndexScanNode* isn = static_cast<IndexScanNode*>(leafNodes[i]); explodeScan(isn, desiredSort, fieldsToExplode[i], &merge->children); } merge->computeProperties(); // Replace 'toReplace' with the new merge sort node. replaceNodeInTree(solnRoot, toReplace, merge); // And get rid of the node that got replaced. delete toReplace; return true; }