bool getFieldName(OperationContext* txn, Collection* collection, IndexCatalog* indexCatalog, string* fieldOut, string* errOut, bool *isFrom2D) { vector<IndexDescriptor*> idxs; // First, try 2d. collection->getIndexCatalog()->findIndexByType(txn, IndexNames::GEO_2D, idxs); if (idxs.size() > 1) { *errOut = "more than one 2d index, not sure which to run geoNear on"; return false; } if (1 == idxs.size()) { BSONObj indexKp = idxs[0]->keyPattern(); BSONObjIterator kpIt(indexKp); while (kpIt.more()) { BSONElement elt = kpIt.next(); if (String == elt.type() && IndexNames::GEO_2D == elt.valuestr()) { *fieldOut = elt.fieldName(); *isFrom2D = true; return true; } } } // Next, 2dsphere. idxs.clear(); collection->getIndexCatalog()->findIndexByType(txn, IndexNames::GEO_2DSPHERE, idxs); if (0 == idxs.size()) { *errOut = "no geo indices for geoNear"; return false; } if (idxs.size() > 1) { *errOut = "more than one 2dsphere index, not sure which to run geoNear on"; return false; } // 1 == idx.size() BSONObj indexKp = idxs[0]->keyPattern(); BSONObjIterator kpIt(indexKp); while (kpIt.more()) { BSONElement elt = kpIt.next(); if (String == elt.type() && IndexNames::GEO_2DSPHERE == elt.valuestr()) { *fieldOut = elt.fieldName(); *isFrom2D = false; return true; } } return false; }
static bool indexCompatibleMaxMin(const BSONObj& obj, const CollatorInterface* queryCollator, const IndexEntry& indexEntry) { BSONObjIterator kpIt(indexEntry.keyPattern); BSONObjIterator objIt(obj); const bool collatorsMatch = CollatorInterface::collatorsMatch(queryCollator, indexEntry.collator); for (;;) { // Every element up to this point has matched so the KP matches if (!kpIt.more() && !objIt.more()) { return true; } // If only one iterator is done, it's not a match. if (!kpIt.more() || !objIt.more()) { return false; } // Field names must match and be in the same order. BSONElement kpElt = kpIt.next(); BSONElement objElt = objIt.next(); if (!mongoutils::str::equals(kpElt.fieldName(), objElt.fieldName())) { return false; } // If the index collation doesn't match the query collation, and the min/max obj has a // boundary value that needs to respect the collation, then the index is not compatible. if (!collatorsMatch && CollationIndexKey::isCollatableType(objElt.type())) { return false; } } }
ProjectionStage::ProjectionStage(const ProjectionStageParams& params, WorkingSet* ws, PlanStage* child) : _ws(ws), _child(child), _commonStats(kStageType), _projImpl(params.projImpl) { _projObj = params.projObj; if (ProjectionStageParams::NO_FAST_PATH == _projImpl) { _exec.reset(new ProjectionExec(params.projObj, params.fullExpression, *params.whereCallback)); } else { // We shouldn't need the full expression if we're fast-pathing. invariant(NULL == params.fullExpression); // Sanity-check the input. invariant(_projObj.isOwned()); invariant(!_projObj.isEmpty()); // Figure out what fields are in the projection. getSimpleInclusionFields(_projObj, &_includedFields); // If we're pulling data out of one index we can pre-compute the indices of the fields // in the key that we pull data from and avoid looking up the field name each time. if (ProjectionStageParams::COVERED_ONE_INDEX == params.projImpl) { // Sanity-check. _coveredKeyObj = params.coveredKeyObj; invariant(_coveredKeyObj.isOwned()); BSONObjIterator kpIt(_coveredKeyObj); while (kpIt.more()) { BSONElement elt = kpIt.next(); unordered_set<StringData, StringData::Hasher>::iterator fieldIt; fieldIt = _includedFields.find(elt.fieldNameStringData()); if (_includedFields.end() == fieldIt) { // Push an unused value on the back to keep _includeKey and _keyFieldNames // in sync. _keyFieldNames.push_back(StringData()); _includeKey.push_back(false); } else { // If we are including this key field store its field name. _keyFieldNames.push_back(*fieldIt); _includeKey.push_back(true); } } } else { invariant(ProjectionStageParams::SIMPLE_DOC == params.projImpl); } } }
bool providesSort(const CanonicalQuery& query, const BSONObj& kp) { BSONObjIterator sortIt(query.getParsed().getSort()); BSONObjIterator kpIt(kp); while (sortIt.more() && kpIt.more()) { // We want the field name to be the same as well (so we pass true). // TODO: see if we can pull a reverse sort out... if (0 != sortIt.next().woCompare(kpIt.next(), true)) { return false; } } // every elt in sort matched kp return !sortIt.more(); }
static bool indexCompatibleMaxMin(const BSONObj& obj, const BSONObj& keyPattern) { BSONObjIterator kpIt(keyPattern); BSONObjIterator objIt(obj); for (;;) { // Every element up to this point has matched so the KP matches if (!kpIt.more() && !objIt.more()) { return true; } // If only one iterator is done, it's not a match. if (!kpIt.more() || !objIt.more()) { return false; } // Field names must match and be in the same order. BSONElement kpElt = kpIt.next(); BSONElement objElt = objIt.next(); if (!mongoutils::str::equals(kpElt.fieldName(), objElt.fieldName())) { return false; } } }
/** * Given the set of relevant indices, annotate predicates with any applicable indices. Also * mark how applicable the indices are (see RelevantIndex::Relevance). */ void rateIndices(const vector<BSONObj>& indices, PredicateMap* predicates) { for (size_t i = 0; i < indices.size(); ++i) { BSONObjIterator kpIt(indices[i]); BSONElement elt = kpIt.next(); // We're looking at the first element in the index. We can definitely use any index // prefixed by the predicate's field to answer that predicate. for (PredicateMap::iterator it = predicates->find(elt.fieldName()); it != predicates->end(); ++it) { it->second.relevant.insert(RelevantIndex(i, RelevantIndex::FIRST)); } // We're now looking at the subsequent elements of the index. We can only use these if // we have a restriction of all the previous fields. We won't figure that out until // later. while (kpIt.more()) { elt = kpIt.next(); for (PredicateMap::iterator it = predicates->find(elt.fieldName()); it != predicates->end(); ++it) { it->second.relevant.insert(RelevantIndex(i, RelevantIndex::NOT_FIRST)); } } } }
bool QueryPlannerAnalysis::explodeForSort(const CanonicalQuery& query, const QueryPlannerParams& params, QuerySolutionNode** solnRoot) { vector<QuerySolutionNode*> leafNodes; if (!structureOKForExplode(*solnRoot)) { return false; } getLeafNodes(*solnRoot, &leafNodes); const BSONObj& desiredSort = query.getParsed().getSort(); // How many scan leaves will result from our expansion? size_t totalNumScans = 0; // The value of entry i is how many scans we want to blow up for leafNodes[i]. // We calculate this in the loop below and might as well reuse it if we blow up // that scan. vector<size_t> fieldsToExplode; // The sort order we're looking for has to possibly be provided by each of the index scans // upon explosion. for (size_t i = 0; i < leafNodes.size(); ++i) { // We can do this because structureOKForExplode is only true if the leaves are index // scans. IndexScanNode* isn = static_cast<IndexScanNode*>(leafNodes[i]); const IndexBounds& bounds = isn->bounds; // Not a point interval prefix, can't try to rewrite. if (bounds.isSimpleRange) { return false; } // How many scans will we create if we blow up this ixscan? size_t numScans = 1; // Skip every field that is a union of point intervals and build the resulting sort // order from the remaining fields. BSONObjIterator kpIt(isn->indexKeyPattern); size_t boundsIdx = 0; while (kpIt.more()) { const OrderedIntervalList& oil = bounds.fields[boundsIdx]; if (!isUnionOfPoints(oil)) { break; } numScans *= oil.intervals.size(); kpIt.next(); ++boundsIdx; } // There's no sort order left to gain by exploding. Just go home. TODO: verify nothing // clever we can do here. if (!kpIt.more()) { return false; } // The rest of the fields define the sort order we could obtain by exploding // the bounds. BSONObjBuilder resultingSortBob; while (kpIt.more()) { resultingSortBob.append(kpIt.next()); } // See if it's the order we're looking for. BSONObj possibleSort = resultingSortBob.obj(); if (0 != possibleSort.woCompare(desiredSort)) { return false; } // Do some bookkeeping to see how many ixscans we'll create total. totalNumScans += numScans; // And for this scan how many fields we expand. fieldsToExplode.push_back(boundsIdx); } // Too many ixscans spoil the performance. if (totalNumScans > QueryPlannerAnalysis::kMaxScansToExplode) { QLOG() << "Could expand ixscans to pull out sort order but resulting scan count" << "(" << totalNumScans << ") is too high."; return false; } // If we're here, we can (probably? depends on how restrictive the structure check is) // get our sort order via ixscan blow-up. for (size_t i = 0; i < leafNodes.size(); ++i) { IndexScanNode* isn = static_cast<IndexScanNode*>(leafNodes[i]); QuerySolutionNode* newNode = explodeScan(isn, desiredSort, fieldsToExplode[i]); // Replace 'isn' with 'newNode' replaceNodeInTree(solnRoot, isn, newNode); // And get rid of the old data access node. delete isn; } return true; }
ProjectionStage::ProjectionStage(const ProjectionStageParams& params, WorkingSet* ws, PlanStage* child) : _ws(ws), _child(child), _projImpl(params.projImpl) { if (ProjectionStageParams::NO_FAST_PATH == _projImpl) { _exec.reset(new ProjectionExec(params.projObj, params.fullExpression)); } else { // We shouldn't need the full expression if we're fast-pathing. invariant(NULL == params.fullExpression); _projObj = params.projObj; // Sanity-check the input. invariant(_projObj.isOwned()); invariant(!_projObj.isEmpty()); // The _id is included by default. bool includeId = true; // Figure out what fields are in the projection. TODO: we can get this from the // ParsedProjection...modify that to have this type instead of a vector. BSONObjIterator projObjIt(_projObj); while (projObjIt.more()) { BSONElement elt = projObjIt.next(); // Must deal with the _id case separately as there is an implicit _id: 1 in the // projection. if (mongoutils::str::equals(elt.fieldName(), kIdField) && !elt.trueValue()) { includeId = false; continue; } _includedFields.insert(elt.fieldNameStringData()); } if (includeId) { _includedFields.insert(kIdField); } // If we're pulling data out of one index we can pre-compute the indices of the fields // in the key that we pull data from and avoid looking up the field name each time. if (ProjectionStageParams::COVERED_ONE_INDEX == params.projImpl) { // Sanity-check. _coveredKeyObj = params.coveredKeyObj; invariant(_coveredKeyObj.isOwned()); BSONObjIterator kpIt(_coveredKeyObj); while (kpIt.more()) { BSONElement elt = kpIt.next(); unordered_set<StringData, StringData::Hasher>::iterator fieldIt; fieldIt = _includedFields.find(elt.fieldNameStringData()); if (_includedFields.end() == fieldIt) { // Push an unused value on the back to keep _includeKey and _keyFieldNames // in sync. _keyFieldNames.push_back(StringData()); _includeKey.push_back(false); } else { // If we are including this key field store its field name. _keyFieldNames.push_back(*fieldIt); _includeKey.push_back(true); } } } else { invariant(ProjectionStageParams::SIMPLE_DOC == params.projImpl); } } }
bool PlanEnumerator::prepMemo(MatchExpression* node) { if (Indexability::nodeCanUseIndexOnOwnField(node)) { // We only get here if our parent is an OR, an array operator, or we're the root. // If we have no index tag there are no indices we can use. if (NULL == node->getTag()) { return false; } RelevantTag* rt = static_cast<RelevantTag*>(node->getTag()); // In order to definitely use an index it must be prefixed with our field. // We don't consider notFirst indices here because we must be AND-related to a node // that uses the first spot in that index, and we currently do not know that // unless we're in an AND node. if (0 == rt->first.size()) { return false; } // We know we can use an index, so grab a memo spot. size_t myMemoID; NodeAssignment* assign; allocateAssignment(node, &assign, &myMemoID); assign->pred.reset(new PredicateAssignment()); assign->pred->expr = node; assign->pred->first.swap(rt->first); return true; } else if (MatchExpression::OR == node->matchType()) { // For an OR to be indexed, all its children must be indexed. for (size_t i = 0; i < node->numChildren(); ++i) { if (!prepMemo(node->getChild(i))) { return false; } } // If we're here we're fully indexed and can be in the memo. size_t myMemoID; NodeAssignment* assign; allocateAssignment(node, &assign, &myMemoID); OrAssignment* orAssignment = new OrAssignment(); for (size_t i = 0; i < node->numChildren(); ++i) { orAssignment->subnodes.push_back(_nodeToId[node->getChild(i)]); } assign->orAssignment.reset(orAssignment); return true; } else if (MatchExpression::AND == node->matchType() || Indexability::arrayUsesIndexOnChildren(node)) { // map from idx id to children that have a pred over it. unordered_map<IndexID, vector<MatchExpression*> > idxToFirst; unordered_map<IndexID, vector<MatchExpression*> > idxToNotFirst; vector<MemoID> subnodes; for (size_t i = 0; i < node->numChildren(); ++i) { MatchExpression* child = node->getChild(i); if (Indexability::nodeCanUseIndexOnOwnField(child)) { RelevantTag* rt = static_cast<RelevantTag*>(child->getTag()); for (size_t j = 0; j < rt->first.size(); ++j) { idxToFirst[rt->first[j]].push_back(child); } for (size_t j = 0 ; j< rt->notFirst.size(); ++j) { idxToNotFirst[rt->notFirst[j]].push_back(child); } } else { if (prepMemo(child)) { verify(_nodeToId.end() != _nodeToId.find(child)); size_t childID = _nodeToId[child]; subnodes.push_back(childID); } } } if (idxToFirst.empty() && (subnodes.size() == 0)) { return false; } AndAssignment* newAndAssignment = new AndAssignment(); newAndAssignment->subnodes.swap(subnodes); // At this point we know how many indices the AND's predicate children are over. newAndAssignment->predChoices.resize(idxToFirst.size()); // This iterates through the predChoices. size_t predChoicesIdx = 0; // For each FIRST, we assign nodes to it. for (unordered_map<IndexID, vector<MatchExpression*> >::iterator it = idxToFirst.begin(); it != idxToFirst.end(); ++it) { OneIndexAssignment* assign = &newAndAssignment->predChoices[predChoicesIdx]; ++predChoicesIdx; // Fill out the OneIndexAssignment with the preds that are over the first field. assign->index = it->first; // We can swap because we're never touching idxToFirst again after this loop over it. assign->preds.swap(it->second); // If it's a multikey index, we can't intersect the bounds, so we only want one pred. if ((*_indices)[it->first].multikey) { // XXX: pick a better pred than the first one that happens to wander in. // XXX: see and3.js, indexq.js, arrayfind7.js QLOG() << "Index " << (*_indices)[it->first].keyPattern.toString() << " is multikey but has >1 pred possible, should be smarter" << " here and pick the best one" << endl; assign->preds.resize(1); } assign->positions.resize(assign->preds.size(), 0); // // Compound analysis here and below. // // Don't compound on multikey indices. (XXX: not whole story...) if ((*_indices)[it->first].multikey) { continue; } // Grab the expressions that are notFirst for the index whose assignments we're filling out. unordered_map<size_t, vector<MatchExpression*> >::const_iterator compoundIt = idxToNotFirst.find(it->first); if (compoundIt == idxToNotFirst.end()) { continue; } const vector<MatchExpression*>& tryCompound = compoundIt->second; // Walk over the key pattern trying to find BSONObjIterator kpIt((*_indices)[it->first].keyPattern); // Skip the first elt as it's already assigned. kpIt.next(); size_t posInIdx = 0; while (kpIt.more()) { BSONElement keyElt = kpIt.next(); ++posInIdx; bool fieldAssigned = false; for (size_t j = 0; j < tryCompound.size(); ++j) { MatchExpression* maybe = tryCompound[j]; // Sigh we grab the full path from the relevant tag. RelevantTag* rt = static_cast<RelevantTag*>(maybe->getTag()); if (keyElt.fieldName() == rt->path) { assign->preds.push_back(maybe); assign->positions.push_back(posInIdx); fieldAssigned = true; } } // If we have (a,b,c) and we can't assign something to 'b' don't try // to assign something to 'c'. if (!fieldAssigned) { break; } } } // Some predicates *require* an index. We stuff these in 'mandatory' inside of the // AndAssignment. // // TODO: We can compute this "on the fly" above somehow, but it's clearer to see what's // going on when we do this as a separate step. // // TODO: Consider annotating mandatory indices in the planner as part of the available // index tagging. // Note we're not incrementing 'i' in the loop. We may erase the i-th element. for (size_t i = 0; i < newAndAssignment->predChoices.size();) { const OneIndexAssignment& oie = newAndAssignment->predChoices[i]; bool hasPredThatRequiresIndex = false; for (size_t j = 0; j < oie.preds.size(); ++j) { MatchExpression* expr = oie.preds[j]; if (MatchExpression::GEO_NEAR == expr->matchType()) { hasPredThatRequiresIndex = true; break; } if (MatchExpression::TEXT == expr->matchType()) { hasPredThatRequiresIndex = true; break; } } if (hasPredThatRequiresIndex) { newAndAssignment->mandatory.push_back(oie); newAndAssignment->predChoices.erase(newAndAssignment->predChoices.begin() + i); } else { ++i; } } newAndAssignment->resetEnumeration(); size_t myMemoID; NodeAssignment* assign; allocateAssignment(node, &assign, &myMemoID); // Takes ownership. assign->newAnd.reset(newAndAssignment); return true; } // Don't know what the node is at this point. return false; }