Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, Runner** out) { // This should'a been checked by the distinct command. verify(collection); // TODO: check for idhack here? // When can we do a fast distinct hack? // 1. There is a plan with just one leaf and that leaf is an ixscan. // 2. The ixscan indexes the field we're interested in. // 2a: We are correct if the index contains the field but for now we look for prefix. // 3. The query is covered/no fetch. // // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. if (desc->keyPattern().firstElement().fieldName() == field) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } } // If there are no suitable indices for the distinct hack bail out now into regular planning // with no projection. if (plannerParams.indices.empty()) { CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); } // // If we're here, we have an index prefixed by the field we're distinct-ing over. // // Applying a projection allows the planner to try to give us covered plans that we can turn // into the projection hack. getDistinctProjection deals with .find() projection semantics // (ie _id:1 being implied by default). BSONObj projection = getDistinctProjection(field); // Apply a projection of the key. Empty BSONObj() is for the sort. CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq); if (!status.isOK()) { return status; } // If there's no query, we can just distinct-scan one of the indices. // Not every index in plannerParams.indices may be suitable. Refer to // getDistinctNodeIndex(). size_t distinctNodeIndex = 0; if (query.isEmpty() && getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) { DistinctNode* dn = new DistinctNode(); dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; QueryPlannerParams params; // Takes ownership of 'dn'. QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); verify(soln); LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*soln); WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *soln, &root, &ws)); *out = new SingleSolutionRunner(collection, cq, soln, root, ws); return Status::OK(); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { return getRunner(collection, cq, out); } // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*solutions[i]); // Build and return the SSR over solutions[i]. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *solutions[i], &root, &ws)); *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws); return Status::OK(); } } // If we're here, the planner made a soln with the restricted index set but we couldn't // translate any of them into a distinct-compatible soln. So, delete the solutions and just // go through normal planning. for (size_t i = 0; i < solutions.size(); ++i) { delete solutions[i]; } // We drop the projection from the 'cq'. Unfortunately this is not trivial. delete cq; status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); }
StatusWith<ParsedDistinct> ParsedDistinct::parse(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& cmdObj, const ExtensionsCallback& extensionsCallback, bool isExplain, const CollatorInterface* defaultCollator) { IDLParserErrorContext ctx("distinct"); DistinctCommand parsedDistinct(nss); try { parsedDistinct = DistinctCommand::parse(ctx, cmdObj); } catch (...) { return exceptionToStatus(); } auto qr = stdx::make_unique<QueryRequest>(nss); if (parsedDistinct.getKey().find('\0') != std::string::npos) { return Status(ErrorCodes::Error(31032), "Key field cannot contain an embedded null byte"); } // Create a projection on the fields needed by the distinct command, so that the query planner // will produce a covered plan if possible. qr->setProj(getDistinctProjection(std::string(parsedDistinct.getKey()))); if (auto query = parsedDistinct.getQuery()) { qr->setFilter(query.get()); } if (auto collation = parsedDistinct.getCollation()) { qr->setCollation(collation.get()); } if (auto comment = parsedDistinct.getComment()) { qr->setComment(comment.get().toString()); } // The IDL parser above does not handle generic command arguments. Since the underlying query // request requires the following options, manually parse and verify them here. if (auto readConcernElt = cmdObj[repl::ReadConcernArgs::kReadConcernFieldName]) { if (readConcernElt.type() != BSONType::Object) { return Status(ErrorCodes::TypeMismatch, str::stream() << "\"" << repl::ReadConcernArgs::kReadConcernFieldName << "\" had the wrong type. Expected " << typeName(BSONType::Object) << ", found " << typeName(readConcernElt.type())); } qr->setReadConcern(readConcernElt.embeddedObject()); } if (auto queryOptionsElt = cmdObj[QueryRequest::kUnwrappedReadPrefField]) { if (queryOptionsElt.type() != BSONType::Object) { return Status(ErrorCodes::TypeMismatch, str::stream() << "\"" << QueryRequest::kUnwrappedReadPrefField << "\" had the wrong type. Expected " << typeName(BSONType::Object) << ", found " << typeName(queryOptionsElt.type())); } qr->setUnwrappedReadPref(queryOptionsElt.embeddedObject()); } if (auto maxTimeMSElt = cmdObj[QueryRequest::cmdOptionMaxTimeMS]) { auto maxTimeMS = QueryRequest::parseMaxTimeMS(maxTimeMSElt); if (!maxTimeMS.isOK()) { return maxTimeMS.getStatus(); } qr->setMaxTimeMS(static_cast<unsigned int>(maxTimeMS.getValue())); } qr->setExplain(isExplain); const boost::intrusive_ptr<ExpressionContext> expCtx; auto cq = CanonicalQuery::canonicalize(opCtx, std::move(qr), expCtx, extensionsCallback, MatchExpressionParser::kAllowAllSpecialFeatures); if (!cq.isOK()) { return cq.getStatus(); } if (cq.getValue()->getQueryRequest().getCollation().isEmpty() && defaultCollator) { cq.getValue()->setCollator(defaultCollator->clone()); } return ParsedDistinct(std::move(cq.getValue()), parsedDistinct.getKey().toString()); }