bool AllMatchExpression::_match( const BSONElementSet& all ) const { if ( all.size() == 0 ) return _arrayEntries.singleNull(); const BSONElementSet& equalities = _arrayEntries.equalities(); for ( BSONElementSet::const_iterator i = equalities.begin(); i != equalities.end(); ++i ) { BSONElement foo = *i; if ( all.count( foo ) == 0 ) return false; } for ( size_t i = 0; i < _arrayEntries.numRegexes(); i++ ) { bool found = false; for ( BSONElementSet::const_iterator j = all.begin(); j != all.end(); ++j ) { BSONElement bar = *j; if ( _arrayEntries.regex(i)->matchesSingleElement( bar ) ) { found = true; break; } } if ( ! found ) return false; } return true; }
void getKeys( const BSONObj &obj, BSONObjSet &keys ) const { BSONElement loc = obj.getFieldDotted( _geo ); if ( loc.eoo() ) return; uassert( 13323 , "latlng not an array" , loc.isABSONObj() ); string root; { BSONObjIterator i( loc.Obj() ); BSONElement x = i.next(); BSONElement y = i.next(); root = makeString( hash(x) , hash(y) ); } verify( _other.size() == 1 ); BSONElementSet all; obj.getFieldsDotted( _other[0] , all ); if ( all.size() == 0 ) { _add( obj , root , BSONElement() , keys ); } else { for ( BSONElementSet::iterator i=all.begin(); i!=all.end(); ++i ) { _add( obj , root , *i , keys ); } } }
// Get the index keys for elements that are GeoJSON. void S2AccessMethod::getGeoKeys(const BSONElementSet& elements, BSONObjSet* out) const { for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { uassert(16754, "Can't parse geometry from element: " + i->toString(), i->isABSONObj()); const BSONObj &obj = i->Obj(); vector<string> cells; bool succeeded = S2SearchUtil::getKeysForObject(obj, _params, &cells); uassert(16755, "Can't extract geo keys from object, malformed geometry?:" + obj.toString(), succeeded); uassert(16756, "Unable to generate keys for (likely malformed) geometry: " + obj.toString(), cells.size() > 0); for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { BSONObjBuilder b; b.append("", *it); out->insert(b.obj()); } } if (0 == out->size()) { BSONObjBuilder b; b.appendNull(""); out->insert(b.obj()); } }
void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { BSONObjSet keysToAdd; // We output keys in the same order as the fields we index. BSONObjIterator i(_descriptor->keyPattern()); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { // We can't ever return documents that don't have geometry so don't bother indexing // them. if (fieldElements.empty()) { return; } getGeoKeys(obj, fieldElements, &keysForThisField); } else { getLiteralKeys(fieldElements, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt!= keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } if (keysToAdd.size() > _params.maxKeysPerInsert) { warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() << ") consider creating larger buckets. obj=" << obj; } *keys = keysToAdd; }
// elements is a non-geo field. Add the values literally, expanding arrays. void getLiteralKeys(const BSONElementSet &elements, BSONObjSet *out) const { if (0 == elements.size()) { // Missing fields are indexed as null. BSONObjBuilder b; b.appendNull(""); out->insert(b.obj()); } else { for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { getOneLiteralKey(*i, out); } } }
bool AllElemMatchOp::matches( const BSONObj& doc, MatchDetails* details ) const { BSONElementSet all; doc.getFieldsDotted( _path, all, false ); for ( BSONElementSet::const_iterator i = all.begin(); i != all.end(); ++i ) { BSONElement sub = *i; if ( sub.type() != Array ) continue; if ( _allMatch( sub.Obj() ) ) { return true; } } return false; }
// static void ExpressionKeysPrivate::getHaystackKeys(const BSONObj& obj, const std::string& geoField, const std::vector<std::string>& otherFields, double bucketSize, BSONObjSet* keys) { BSONElement loc = obj.getFieldDotted(geoField); if (loc.eoo()) { return; } // NOTE: We explicitly test nFields >= 2 to support legacy users who may have indexed // (intentionally or unintentionally) objects/arrays with more than two fields. uassert(16775, str::stream() << "cannot extract [lng, lat] array or object from " << obj, loc.isABSONObj() && loc.Obj().nFields() >= 2); string root; { BSONObjIterator i(loc.Obj()); BSONElement x = i.next(); BSONElement y = i.next(); root = makeHaystackString(hashHaystackElement(x, bucketSize), hashHaystackElement(y, bucketSize)); } verify(otherFields.size() == 1); BSONElementSet all; // This is getFieldsDotted (plural not singular) since the object we're indexing // may be an array. obj.getFieldsDotted(otherFields[0], all); if (all.size() == 0) { // We're indexing a document that doesn't have the secondary non-geo field present. // XXX: do we want to add this even if all.size() > 0? result:empty search terms // match everything instead of only things w/empty search terms) addKey(root, BSONElement(), keys); } else { // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, // all.size()==1. We can query on the complete field. // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values // "A" and "B". The query looks for any of the fields in the array. for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { addKey(root, *i, keys); } } }
// Get the index keys for elements that are GeoJSON. void S2AccessMethod::getGeoKeys(const BSONElementSet& elements, BSONObjSet* out) const { S2RegionCoverer coverer; _params.configureCoverer(&coverer); // See here for GeoJSON format: geojson.org/geojson-spec.html for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { uassert(16754, "Can't parse geometry from element: " + i->toString(), i->isABSONObj()); const BSONObj &obj = i->Obj(); vector<string> cells; S2Polyline line; S2Cell point; // We only support GeoJSON polygons. Why?: // 1. we don't automagically do WGS84/flat -> WGS84, and // 2. the old polygon format must die. if (GeoParser::isGeoJSONPolygon(obj)) { S2Polygon polygon; GeoParser::parseGeoJSONPolygon(obj, &polygon); keysFromRegion(&coverer, polygon, &cells); } else if (GeoParser::parseLineString(obj, &line)) { keysFromRegion(&coverer, line, &cells); } else if (GeoParser::parsePoint(obj, &point)) { S2CellId parent(point.id().parent(_params.finestIndexedLevel)); cells.push_back(parent.toString()); } else { uasserted(16755, "Can't extract geo keys from object, malformed geometry?:" + obj.toString()); } uassert(16756, "Unable to generate keys for (likely malformed) geometry: " + obj.toString(), cells.size() > 0); for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { BSONObjBuilder b; b.append("", *it); out->insert(b.obj()); } } if (0 == out->size()) { BSONObjBuilder b; b.appendNull(""); out->insert(b.obj()); } }
// This is the actual search. bool S2Cursor::advance() { if (_numToReturn <= 0) { return false; } for (; _btreeCursor->ok(); _btreeCursor->advance()) { ++_nscanned; if (_seen.end() != _seen.find(_btreeCursor->currLoc())) { continue; } _seen.insert(_btreeCursor->currLoc()); ++_matchTested; MatchDetails details; bool matched = _matcher->matchesCurrent(_btreeCursor.get(), &details); if (!matched) { continue; } const BSONObj &indexedObj = _btreeCursor->currLoc().obj(); ++_geoTested; size_t geoFieldsMatched = 0; // OK, cool, non-geo match satisfied. See if the object actually overlaps w/the geo // query fields. for (size_t i = 0; i < _fields.size(); ++i) { BSONElementSet geoFieldElements; indexedObj.getFieldsDotted(_fields[i].getField(), geoFieldElements, false); if (geoFieldElements.empty()) { continue; } bool match = false; for (BSONElementSet::iterator oi = geoFieldElements.begin(); !match && (oi != geoFieldElements.end()); ++oi) { if (!oi->isABSONObj()) { continue; } const BSONObj &geoObj = oi->Obj(); GeometryContainer geoContainer; uassert(16698, "malformed geometry: " + geoObj.toString(), geoContainer.parseFrom(geoObj)); match = _fields[i].satisfiesPredicate(geoContainer); } if (match) { ++geoFieldsMatched; } } if (geoFieldsMatched == _fields.size()) { // We have a winner! And we point at it. --_numToReturn; return true; } } return false; }
// elements is a non-geo field. Add the values literally, expanding arrays. void getLiteralKeys(const BSONElementSet &elements, BSONObjSet *out) const { if (0 == elements.size()) { BSONObjBuilder b; b.appendNull(""); out->insert(b.obj()); } else if (1 == elements.size()) { BSONObjBuilder b; b.appendAs(*elements.begin(), ""); out->insert(b.obj()); } else { BSONArrayBuilder aBuilder; for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { aBuilder.append(*i); } BSONObjBuilder b; b.append("", aBuilder.arr()); out->insert(b.obj()); } }
void BSONObj::getFieldsDotted(const char *name, BSONElementSet &ret ) const { BSONElement e = getField( name ); if ( e.eoo() ) { const char *p = strchr(name, '.'); if ( p ) { string left(name, p-name); const char* next = p+1; BSONElement e = getField( left.c_str() ); if (e.type() == Object){ e.embeddedObject().getFieldsDotted(next, ret); } else if (e.type() == Array) { bool allDigits = false; if ( isdigit( *next ) ){ const char * temp = next + 1; while ( isdigit( *temp ) ) temp++; allDigits = *temp == '.'; } if (allDigits) { e.embeddedObject().getFieldsDotted(next, ret); } else { BSONObjIterator i(e.embeddedObject()); while ( i.more() ){ BSONElement e2 = i.next(); if (e2.type() == Object || e2.type() == Array) e2.embeddedObject().getFieldsDotted(next, ret); } } } else { // do nothing: no match } } } else { if (e.type() == Array){ BSONObjIterator i(e.embeddedObject()); while ( i.more() ) ret.insert(i.next()); } else { ret.insert(e); } } }
/** * Find and parse all geometry elements on the appropriate field path from the document. */ static void extractGeometries(const BSONObj& doc, const string& path, vector<StoredGeometry*>* geometries) { BSONElementSet geomElements; // NOTE: Annoyingly, we cannot just expand arrays b/c single 2d points are arrays, we need // to manually expand all results to check if they are geometries doc.getFieldsDotted(path, geomElements, false /* expand arrays */); for (BSONElementSet::iterator it = geomElements.begin(); it != geomElements.end(); ++it) { const BSONElement& el = *it; auto_ptr<StoredGeometry> stored(StoredGeometry::parseFrom(el)); if (stored.get()) { // Valid geometry element geometries->push_back(stored.release()); } else if (el.type() == Array) { // Many geometries may be in an array BSONObjIterator arrIt(el.Obj()); while (arrIt.more()) { const BSONElement nextEl = arrIt.next(); stored.reset(StoredGeometry::parseFrom(nextEl)); if (stored.get()) { // Valid geometry element geometries->push_back(stored.release()); } else { warning() << "geoNear stage read non-geometry element " << nextEl.toString() << " in array " << el.toString(); } } } else { warning() << "geoNear stage read non-geometry element " << el.toString(); } } }
bool AllMatchExpression::matchesSingleElement( const BSONElement& e ) const { if ( _arrayEntries.size() == 0 ) return false; if ( e.eoo() ) return _arrayEntries.singleNull(); BSONElementSet all; if ( e.type() == Array ) { BSONObjIterator i( e.Obj() ); while ( i.more() ) { all.insert( i.next() ); } } else { // this is the part i want to remove all.insert( e ); } return _match( all ); }
void BSONObj::getFieldsDotted(const char *name, BSONElementSet &ret, bool *deep ) const { BSONElement e = getField( name ); if ( e.eoo() ) { const char *p = strchr(name, '.'); if ( p ) { string left(name, p-name); BSONElement e = getField( left ); if ( e.type() == Array ) { trueDat( deep ); BSONObjIterator i( e.embeddedObject() ); while( i.more() ) { BSONElement f = i.next(); if ( f.eoo() ) break; if ( f.type() == Object ) f.embeddedObject().getFieldsDotted(p+1, ret); } } else if ( e.type() == Object ) { e.embeddedObject().getFieldsDotted(p+1, ret); } } } else { if ( e.type() == Array ) { trueDat( deep ); BSONObjIterator i( e.embeddedObject() ); while( i.more() ) { BSONElement f = i.next(); if ( f.eoo() ) break; ret.insert( f ); } } else { ret.insert( e ); } } if ( ret.empty() && deep ) *deep = false; }
// Get the index keys for elements that are GeoJSON. void getGeoKeys(const BSONElementSet &elements, BSONObjSet *out) const { S2RegionCoverer coverer; _params.configureCoverer(&coverer); // See here for GeoJSON format: geojson.org/geojson-spec.html for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { if (!i->isABSONObj()) { continue; } // error? const BSONObj &obj = i->Obj(); vector<string> cells; S2Polygon polygon; S2Polyline line; S2Cell point; if (GeoParser::parsePolygon(obj, &polygon)) { keysFromRegion(&coverer, polygon, &cells); } else if (GeoParser::parseLineString(obj, &line)) { keysFromRegion(&coverer, line, &cells); } else if (GeoParser::parsePoint(obj, &point)) { keysFromRegion(&coverer, point, &cells); } else { uasserted(16572, "Can't extract geo keys from object, malformed geometry?:" + obj.toString()); } for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { BSONObjBuilder b; b.append("", *it); out->insert(b.obj()); } } if (0 == out->size()) { BSONObjBuilder b; b.appendNull(""); out->insert(b.obj()); } }
void getKeys(const BSONObj &obj, BSONObjSet &keys) const { BSONElement loc = obj.getFieldDotted(_geoField); if (loc.eoo()) return; uassert(13323, "latlng not an array", loc.isABSONObj()); string root; { BSONObjIterator i(loc.Obj()); BSONElement x = i.next(); BSONElement y = i.next(); root = makeString(hash(x), hash(y)); } verify(_otherFields.size() == 1); BSONElementSet all; // This is getFieldsDotted (plural not singular) since the object we're indexing // may be an array. obj.getFieldsDotted(_otherFields[0], all); if (all.size() == 0) { // We're indexing a document that doesn't have the secondary non-geo field present. // XXX: do we want to add this even if all.size() > 0? result:empty search terms // match everything instead of only things w/empty search terms) addKey(root, BSONElement(), keys); } else { // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, // all.size()==1. We can query on the complete field. // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values // "A" and "B". The query looks for any of the fields in the array. for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { addKey(root, *i, keys); } } }
bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){ string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); BSONElementSet values; shared_ptr<Cursor> cursor = bestGuessCursor(ns.c_str() , query , BSONObj() ); scoped_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); while ( cursor->ok() ){ if ( !cursor->matcher() || cursor->matcher()->matchesCurrent( cursor.get() ) ){ BSONObj o = cursor->current(); o.getFieldsDotted( key, values ); } cursor->advance(); if (!cc->yieldSometimes()) break; RARELY killCurrentOp.checkForInterrupt(); } BSONArrayBuilder b( result.subarrayStart( "values" ) ); for ( BSONElementSet::iterator i = values.begin() ; i != values.end(); i++ ){ b.append( *i ); } BSONObj arr = b.done(); uassert(10044, "distinct too big, 4mb cap", arr.objsize() < BSONObjMaxUserSize ); return true; }
bool ClientCursor::getFieldsDotted( const string& name, BSONElementSet &ret ) { map<string,int>::const_iterator i = _indexedFields.find( name ); if ( i == _indexedFields.end() ){ current().getFieldsDotted( name , ret ); return false; } int x = i->second; BSONObjIterator it( currKey() ); while ( x && it.more() ) it.next(); assert( x == 0 ); ret.insert( it.next() ); return true; }
/** * Tries to get the fields from the key first, then the object if the keys don't have it. */ bool getFieldsDotted(const map<string, int>& indexedFields, shared_ptr<Cursor> cursor, const string& name, BSONElementSet &ret, BSONObj& holder) { map<string,int>::const_iterator i = indexedFields.find( name ); if ( i == indexedFields.end() ) { cursor->current().getFieldsDotted( name , ret ); return false; } int x = i->second; holder = cursor->currKey(); BSONObjIterator it( holder ); while ( x && it.more() ) { it.next(); x--; } verify( x == 0 ); ret.insert( it.next() ); return true; }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys) { BSONObjSet keysToAdd; // Does one of our documents have a geo field? bool haveGeoField = false; // We output keys in the same order as the fields we index. BSONObjIterator i(keyPattern); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { if (params.indexVersion >= S2_INDEX_VERSION_2) { // For >= V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // >= V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt != keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } // Make sure that if we're >= V2 there's at least one geo field present in the doc. if (params.indexVersion >= S2_INDEX_VERSION_2) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "Insert of geo object generated a high number of keys." << " num keys: " << keysToAdd.size() << " obj inserted: " << obj; } *keys = keysToAdd; }
// static void ExpressionKeysPrivate::get2DKeys(const BSONObj& obj, const TwoDIndexingParams& params, BSONObjSet* keys, std::vector<BSONObj>* locs) { BSONElementMSet bSet; // Get all the nested location fields, but don't return individual elements from // the last array, if it exists. obj.getFieldsDotted(params.geo.c_str(), bSet, false); if (bSet.empty()) return; for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { BSONElement geo = *setI; if (geo.eoo() || !geo.isABSONObj()) continue; // // Grammar for location lookup: // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc // loc ::= { <k1> : #, <k2> : # }|[#, #]|{} // // Empty locations are ignored, preserving single-location semantics // BSONObj embed = geo.embeddedObject(); if (embed.isEmpty()) continue; // Differentiate between location arrays and locations // by seeing if the first element value is a number bool singleElement = embed.firstElement().isNumber(); BSONObjIterator oi(embed); while (oi.more()) { BSONObj locObj; if (singleElement) { locObj = embed; } else { BSONElement locElement = oi.next(); uassert(16804, mongoutils::str::stream() << "location object expected, location array not in correct format", locElement.isABSONObj()); locObj = locElement.embeddedObject(); if (locObj.isEmpty()) continue; } BSONObjBuilder b(64); // Remember the actual location object if needed if (locs) locs->push_back(locObj); // Stop if we don't need to get anything but location objects if (!keys) { if (singleElement) break; else continue; } params.geoHashConverter->hash(locObj, &obj).appendHashMin(&b, ""); // Go through all the other index keys for (vector<pair<string, int>>::const_iterator i = params.other.begin(); i != params.other.end(); ++i) { // Get *all* fields for the index key BSONElementSet eSet; obj.getFieldsDotted(i->first, eSet); if (eSet.size() == 0) b.appendNull(""); else if (eSet.size() == 1) b.appendAs(*(eSet.begin()), ""); else { // If we have more than one key, store as an array of the objects BSONArrayBuilder aBuilder; for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end(); ++ei) { aBuilder.append(*ei); } b.append("", aBuilder.arr()); } } keys->insert(b.obj()); if (singleElement) break; } } }
PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) { PlanStage::StageState state = _child->work(out); // All done reading from _child. if (PlanStage::IS_EOF == state) { _child.reset(); _keyGeoFilter.reset(); // Adjust the annulus size depending on how many results we got. if (_results.empty()) { _radiusIncrement *= 2; } else if (_results.size() < 300) { _radiusIncrement *= 2; } else if (_results.size() > 600) { _radiusIncrement /= 2; } // Make a new ixscan next time. return PlanStage::NEED_TIME; } // Nothing to do unless we advance. if (PlanStage::ADVANCED != state) { return state; } WorkingSetMember* member = _ws->get(*out); // Must have an object in order to get geometry out of it. verify(member->hasObj()); // The scans we use don't dedup so we must dedup them ourselves. We only put locs into here // if we know for sure whether or not we'll return them in this annulus. if (member->hasLoc()) { if (_seenInScan.end() != _seenInScan.find(member->loc)) { return PlanStage::NEED_TIME; } } // Get all the fields with that name from the document. BSONElementSet geom; member->obj.getFieldsDotted(_params.nearQuery.field, geom, false); if (geom.empty()) { return PlanStage::NEED_TIME; } // Some value that any distance we can calculate will be less than. double minDistance = numeric_limits<double>::max(); BSONObj minDistanceObj; for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) { if (!git->isABSONObj()) { mongoutils::str::stream ss; ss << "s2near stage read invalid geometry element " << *git << " from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); return PlanStage::FAILURE; } BSONObj obj = git->Obj(); double distToObj; if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) { if (distToObj < minDistance) { minDistance = distToObj; minDistanceObj = obj; } } else { warning() << "unknown geometry: " << obj.toString(); } } // If we're here we'll either include the doc in this annulus or reject it. It's safe to // ignore it if it pops up again in this annulus. if (member->hasLoc()) { _seenInScan.insert(member->loc); } // If the distance to the doc satisfies our distance criteria, add it to our buffered // results. if (minDistance >= _innerRadius && (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) { _results.push(Result(*out, minDistance)); if (_params.addDistMeta) { // FLAT implies the output distances are in radians. Convert to meters. if (FLAT == _params.nearQuery.centroid.crs) { member->addComputed(new GeoDistanceComputedData(minDistance / kRadiusOfEarthInMeters)); } else { member->addComputed(new GeoDistanceComputedData(minDistance)); } } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(minDistanceObj)); } if (member->hasLoc()) { _invalidationMap[member->loc] = *out; } } return PlanStage::NEED_TIME; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { Timer t; // ensure that the key is a string uassert(18510, mongoutils::str::stream() << "The first argument to the distinct command " << "must be a string but was a " << typeName(cmdObj["key"].type()), cmdObj["key"].type() == mongo::String); // ensure that the where clause is a document if (cmdObj["query"].isNull() == false && cmdObj["query"].eoo() == false) { uassert(18511, mongoutils::str::stream() << "The query for the distinct command must be a " << "document but was a " << typeName(cmdObj["query"].type()), cmdObj["query"].type() == mongo::Object); } string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON(key << 1); BSONObj query = getQuery(cmdObj); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb(bufSize); char* start = bb.buf(); BSONArrayBuilder arr(bb); BSONElementSet values; const string ns = parseNs(dbname, cmdObj); AutoGetCollectionForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); if (!collection) { result.appendArray("values", BSONObj()); result.append("stats", BSON("n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0)); return true; } auto statusWithPlanExecutor = getExecutorDistinct(txn, collection, query, key, PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { uasserted(17216, mongoutils::str::stream() << "Can't get executor for query " << query << ": " << statusWithPlanExecutor.getStatus().toString()); return 0; } unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // Distinct expands arrays. // // If our query is covered, each value of the key should be in the index key and // available to us without this. If a collection scan is providing the data, we may // have to expand an array. BSONElementSet elts; obj.getFieldsDotted(key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } // Get summary information about the plan. PlanSummaryStats stats; Explain::getSummaryStats(*exec, &stats); verify(start == bb.buf()); result.appendArray("values", arr.done()); { BSONObjBuilder b; b.appendNumber("n", stats.nReturned); b.appendNumber("nscanned", stats.totalKeysExamined); b.appendNumber("nscannedObjects", stats.totalDocsExamined); b.appendNumber("timems", t.millis()); b.append("planSummary", Explain::getPlanSummary(exec.get())); result.append("stats", b.obj()); } return true; }
static bool run2DSphereGeoNear(NamespaceDetails* nsDetails, int idxNo, BSONObj& cmdObj, const GeoNearArguments &parsedArgs, string& errmsg, BSONObjBuilder& result) { auto_ptr<IndexDescriptor> descriptor(CatalogHack::getDescriptor(nsDetails, idxNo)); auto_ptr<S2AccessMethod> sam(new S2AccessMethod(descriptor.get())); const S2IndexingParams& params = sam->getParams(); auto_ptr<S2NearIndexCursor> nic(new S2NearIndexCursor(descriptor.get(), params)); vector<string> geoFieldNames; BSONObjIterator i(descriptor->keyPattern()); while (i.more()) { BSONElement e = i.next(); if (e.type() == String && IndexNames::GEO_2DSPHERE == e.valuestr()) { geoFieldNames.push_back(e.fieldName()); } } // NOTE(hk): If we add a new argument to geoNear, we could have a // 2dsphere index with multiple indexed geo fields, and the geoNear // could pick the one to run over. Right now, we just require one. uassert(16552, "geoNear requires exactly one indexed geo field", 1 == geoFieldNames.size()); NearQuery nearQuery(geoFieldNames[0]); uassert(16679, "Invalid geometry given as arguments to geoNear: " + cmdObj.toString(), nearQuery.parseFromGeoNear(cmdObj, params.radius)); uassert(16683, "geoNear on 2dsphere index requires spherical", parsedArgs.isSpherical); // NOTE(hk): For a speedup, we could look through the query to see if // we've geo-indexed any of the fields in it. vector<GeoQuery> regions; nic->seek(parsedArgs.query, nearQuery, regions); // We do pass in the query above, but it's just so we can possibly use it in our index // scan. We have to do our own matching. auto_ptr<Matcher> matcher(new Matcher(parsedArgs.query)); double totalDistance = 0; BSONObjBuilder resultBuilder(result.subarrayStart("results")); double farthestDist = 0; int results; for (results = 0; results < parsedArgs.numWanted && !nic->isEOF(); ++results) { BSONObj currObj = nic->getValue().obj(); if (!matcher->matches(currObj)) { --results; nic->next(); continue; } double dist = nic->currentDistance(); // If we got the distance in radians, output it in radians too. if (nearQuery.fromRadians) { dist /= params.radius; } dist *= parsedArgs.distanceMultiplier; totalDistance += dist; if (dist > farthestDist) { farthestDist = dist; } BSONObjBuilder oneResultBuilder( resultBuilder.subobjStart(BSONObjBuilder::numStr(results))); oneResultBuilder.append("dis", dist); if (parsedArgs.includeLocs) { BSONElementSet geoFieldElements; currObj.getFieldsDotted(geoFieldNames[0], geoFieldElements, false); for (BSONElementSet::iterator oi = geoFieldElements.begin(); oi != geoFieldElements.end(); ++oi) { if (oi->isABSONObj()) { oneResultBuilder.appendAs(*oi, "loc"); } } } oneResultBuilder.append("obj", currObj); oneResultBuilder.done(); nic->next(); } resultBuilder.done(); BSONObjBuilder stats(result.subobjStart("stats")); stats.appendNumber("nscanned", nic->nscanned()); stats.append("avgDistance", totalDistance / results); stats.append("maxDistance", farthestDist); stats.append("time", cc().curop()->elapsedMillis()); stats.done(); return true; }
// Fill _results with all of the results in the annulus defined by _innerRadius and // _outerRadius. If no results are found, grow the annulus and repeat until success (or // until the edge of the world). void S2NearIndexCursor::fillResults() { verify(_results.empty()); if (_innerRadius >= _outerRadius) { return; } if (_innerRadius > _maxDistance) { return; } // We iterate until 1. our search radius is too big or 2. we find results. do { // Some of these arguments are opaque, look at the definitions of the involved classes. FieldRangeSet frs(_descriptor->parentNS().c_str(), makeFRSObject(), false, false); shared_ptr<FieldRangeVector> frv(new FieldRangeVector(frs, _specForFRV, 1)); scoped_ptr<BtreeCursor> cursor(BtreeCursor::make(nsdetails(_descriptor->parentNS()), _descriptor->getOnDisk(), frv, 0, 1)); // The cursor may return the same obj more than once for a given // FRS, so we make sure to only consider it once in any given annulus. // // We don't want this outside of the 'do' loop because the covering // for an annulus may return an object whose distance to the query // point is actually contained in a subsequent annulus. If we // didn't consider every object in a given annulus we might miss // the point. // // We don't use a global 'seen' because we get that by requiring // the distance from the query point to the indexed geo to be // within our 'current' annulus, and I want to dodge all yield // issues if possible. unordered_set<DiskLoc, DiskLoc::Hasher> seen; LOG(1) << "looking at annulus from " << _innerRadius << " to " << _outerRadius << endl; LOG(1) << "Total # returned: " << _stats._numReturned << endl; // Do the actual search through this annulus. for (; cursor->ok(); cursor->advance()) { // Don't bother to look at anything we've returned. if (_returned.end() != _returned.find(cursor->currLoc())) { ++_stats._returnSkip; continue; } ++_stats._nscanned; if (seen.end() != seen.find(cursor->currLoc())) { ++_stats._btreeDups; continue; } // Get distance interval from our query point to the cell. // If it doesn't overlap with our current shell, toss. BSONObj currKey(cursor->currKey()); BSONObjIterator it(currKey); BSONElement geoKey; for (int i = 0; i <= _nearFieldIndex; ++i) { geoKey = it.next(); } S2Cell keyCell = S2Cell(S2CellId::FromString(geoKey.String())); if (!_annulus.MayIntersect(keyCell)) { ++_stats._keyGeoSkip; continue; } // We have to add this document to seen *AFTER* the key intersection test. // A geometry may have several keys, one of which may be in our search shell and one // of which may be outside of it. We don't want to ignore a document just because // one of its covers isn't inside this annulus. seen.insert(cursor->currLoc()); // At this point forward, we will not examine the document again in this annulus. const BSONObj& indexedObj = cursor->currLoc().obj(); // Match against indexed geo fields. ++_stats._geoMatchTested; size_t geoFieldsMatched = 0; // See if the object actually overlaps w/the geo query fields. for (size_t i = 0; i < _indexedGeoFields.size(); ++i) { BSONElementSet geoFieldElements; indexedObj.getFieldsDotted(_indexedGeoFields[i].getField(), geoFieldElements, false); if (geoFieldElements.empty()) { continue; } bool match = false; for (BSONElementSet::iterator oi = geoFieldElements.begin(); !match && (oi != geoFieldElements.end()); ++oi) { if (!oi->isABSONObj()) { continue; } const BSONObj &geoObj = oi->Obj(); GeometryContainer geoContainer; uassert(16762, "ill-formed geometry: " + geoObj.toString(), geoContainer.parseFrom(geoObj)); match = _indexedGeoFields[i].satisfiesPredicate(geoContainer); } if (match) { ++geoFieldsMatched; } } if (geoFieldsMatched != _indexedGeoFields.size()) { continue; } // Get all the fields with that name from the document. BSONElementSet geoFieldElements; indexedObj.getFieldsDotted(_nearQuery.field, geoFieldElements, false); if (geoFieldElements.empty()) { continue; } ++_stats._inAnnulusTested; double minDistance = 1e20; // Look at each field in the document and take the min. distance. for (BSONElementSet::iterator oi = geoFieldElements.begin(); oi != geoFieldElements.end(); ++oi) { if (!oi->isABSONObj()) { continue; } double dist = distanceTo(oi->Obj()); minDistance = min(dist, minDistance); } // We could be in an annulus, yield, add new points closer to // query point than the last point we returned, then unyield. // This would return points out of order. if (minDistance < _returnedDistance) { continue; } // If the min. distance satisfies our distance criteria if (minDistance >= _innerRadius && minDistance < _outerRadius) { // The result is valid. We have to de-dup ourselves here. if (_returned.end() == _returned.find(cursor->currLoc())) { _results.push(Result(cursor->currLoc(), cursor->currKey(), minDistance)); } } } if (_results.empty()) { LOG(1) << "results empty!\n"; _radiusIncrement *= 2; nextAnnulus(); } else if (_results.size() < 300) { _radiusIncrement *= 2; } else if (_results.size() > 600) { _radiusIncrement /= 2; } } while (_results.empty() && _innerRadius < _maxDistance && _innerRadius < _outerRadius); LOG(1) << "Filled shell with " << _results.size() << " results" << endl; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches MatchDetails md; Collection *cl = getCollection( ns ); if ( ! cl ) { result.appendArray( "values" , BSONObj() ); result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) ); return true; } shared_ptr<Cursor> cursor; if ( ! query.isEmpty() ) { cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() ); } else { // query is empty, so lets see if we can find an index // with the key so we don't have to hit the raw data for (int i = 0; i < cl->nIndexes(); i++) { IndexDetails &idx = cl->idx(i); if (cl->isMultikey(i)) { continue; } if ( idx.inKeyPattern( key ) ) { cursor = getBestGuessCursor( ns.c_str() , BSONObj() , idx.keyPattern() ); if( cursor.get() ) break; } } if ( ! cursor.get() ) { cursor = getOptimizedCursor(ns.c_str() , query , BSONObj() ); } } verify( cursor ); string cursorName = cursor->toString(); auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); for ( ; cursor->ok(); cursor->advance() ) { nscanned++; bool loadedRecord = false; if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currPK() ) ) { n++; BSONObj holder; BSONElementSet temp; loadedRecord = ! cc->getFieldsDotted( key , temp, holder ); for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) { BSONElement e = *i; if ( values.count( e ) ) continue; int now = bb.len(); uassert(10044, "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize ); arr.append( e ); BSONElement x( start + now ); values.insert( x ); } } if ( loadedRecord || md.hasLoadedRecord() ) nscannedObjects++; RARELY killCurrentOp.checkForInterrupt(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys, MultikeyPaths* multikeyPaths) { BSONObjSet keysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); // Does one of our documents have a geo field? bool haveGeoField = false; if (multikeyPaths) { invariant(multikeyPaths->empty()); multikeyPaths->resize(keyPattern.nFields()); } size_t posInIdx = 0; // We output keys in the same order as the fields we index. for (const auto keyElem : keyPattern) { // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; const bool expandArrayOnTrailingField = false; std::set<size_t>* arrayComponents = multikeyPaths ? &(*multikeyPaths)[posInIdx] : nullptr; dps::extractAllElementsAlongPath( obj, keyElem.fieldName(), fieldElements, expandArrayOnTrailingField, arrayComponents); // Trailing array values aren't being expanded, so we still need to determine whether the // last component of the indexed path 'keyElem.fieldName()' causes the index to be multikey. // We say that it does if // (a) the last component of the indexed path ever refers to an array value (regardless of // the number of array elements) // (b) the last component of the indexed path ever refers to GeoJSON data that requires // multiple cells for its covering. bool lastPathComponentCausesIndexToBeMultikey; BSONObjSet keysForThisField = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); if (IndexNames::GEO_2DSPHERE == keyElem.valuestr()) { if (params.indexVersion >= S2_INDEX_VERSION_2) { // For >= V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // >= V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } lastPathComponentCausesIndexToBeMultikey = getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { lastPathComponentCausesIndexToBeMultikey = getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); if (multikeyPaths && lastPathComponentCausesIndexToBeMultikey) { const size_t pathLengthOfThisField = FieldRef{keyElem.fieldNameStringData()}.numParts(); invariant(pathLengthOfThisField > 0); (*multikeyPaths)[posInIdx].insert(pathLengthOfThisField - 1); } // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; ++posInIdx; continue; } BSONObjSet updatedKeysToAdd = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt != keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; ++posInIdx; } // Make sure that if we're >= V2 there's at least one geo field present in the doc. if (params.indexVersion >= S2_INDEX_VERSION_2) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "Insert of geo object generated a high number of keys." << " num keys: " << keysToAdd.size() << " obj inserted: " << redact(obj); } *keys = keysToAdd; }
PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) { PlanStage::StageState state = _child->work(out); // All done reading from _child. if (PlanStage::IS_EOF == state) { _child.reset(); // Adjust the annulus size depending on how many results we got. if (_results.empty()) { _radiusIncrement *= 2; } else if (_results.size() < 300) { _radiusIncrement *= 2; } else if (_results.size() > 600) { _radiusIncrement /= 2; } // Make a new ixscan next time. return PlanStage::NEED_TIME; } // Nothing to do unless we advance. if (PlanStage::ADVANCED != state) { return state; } // TODO Speed improvements: // // 0. Modify fetch to preserve key data and test for intersection w/annulus. // // 1. keep track of what we've seen in this scan and possibly ignore it. // // 2. keep track of results we've returned before and ignore them. WorkingSetMember* member = _ws->get(*out); // Must have an object in order to get geometry out of it. verify(member->hasObj()); // Get all the fields with that name from the document. BSONElementSet geom; member->obj.getFieldsDotted(_params.nearQuery.field, geom, false); if (geom.empty()) {return PlanStage::NEED_TIME; } // Some value that any distance we can calculate will be less than. double minDistance = numeric_limits<double>::max(); BSONObj minDistanceObj; for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) { if (!git->isABSONObj()) { mongoutils::str::stream ss; ss << "s2near stage read invalid geometry element " << *git << " from child"; Status status(ErrorCodes::InternalError, ss); *out = WorkingSetCommon::allocateStatusMember( _ws, status); return PlanStage::FAILURE; } BSONObj obj = git->Obj(); double distToObj; if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) { if (distToObj < minDistance) { minDistance = distToObj; minDistanceObj = obj; } } else { warning() << "unknown geometry: " << obj.toString(); } } // If the distance to the doc satisfies our distance criteria, add it to our buffered // results. if (minDistance >= _innerRadius && (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) { _results.push(Result(*out, minDistance)); if (_params.addDistMeta) { member->addComputed(new GeoDistanceComputedData(minDistance)); } if (_params.addPointMeta) { member->addComputed(new GeoNearPointComputedData(minDistanceObj)); } if (member->hasLoc()) { _invalidationMap[member->loc] = *out; } } return PlanStage::NEED_TIME; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) { const string ns = parseNs(dbname, cmdObj); const NamespaceString nss(ns); const ExtensionsCallbackReal extensionsCallback(txn, &nss); auto parsedDistinct = ParsedDistinct::parse(txn, nss, cmdObj, extensionsCallback, false); if (!parsedDistinct.isOK()) { return appendCommandStatus(result, parsedDistinct.getStatus()); } if (!parsedDistinct.getValue().getQuery()->getQueryRequest().getCollation().isEmpty() && serverGlobalParams.featureCompatibility.version.load() == ServerGlobalParams::FeatureCompatibility::Version::k32) { return appendCommandStatus( result, Status(ErrorCodes::InvalidOptions, "The featureCompatibilityVersion must be 3.4 to use collation. See " "http://dochub.mongodb.org/core/3.4-feature-compatibility.")); } AutoGetCollectionOrViewForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); if (ctx.getView()) { ctx.releaseLocksForView(); auto viewAggregation = parsedDistinct.getValue().asAggregationCommand(); if (!viewAggregation.isOK()) { return appendCommandStatus(result, viewAggregation.getStatus()); } BSONObjBuilder aggResult; (void)Command::findCommand("aggregate") ->run(txn, dbname, viewAggregation.getValue(), options, errmsg, aggResult); if (ResolvedView::isResolvedViewErrorResponse(aggResult.asTempObj())) { result.appendElements(aggResult.obj()); return false; } ViewResponseFormatter formatter(aggResult.obj()); Status formatStatus = formatter.appendAsDistinctResponse(&result); if (!formatStatus.isOK()) { return appendCommandStatus(result, formatStatus); } return true; } auto executor = getExecutorDistinct( txn, collection, ns, &parsedDistinct.getValue(), PlanExecutor::YIELD_AUTO); if (!executor.isOK()) { return appendCommandStatus(result, executor.getStatus()); } { stdx::lock_guard<Client>(*txn->getClient()); CurOp::get(txn)->setPlanSummary_inlock( Explain::getPlanSummary(executor.getValue().get())); } string key = cmdObj[ParsedDistinct::kKeyField].valuestrsafe(); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb(bufSize); char* start = bb.buf(); BSONArrayBuilder arr(bb); BSONElementSet values(executor.getValue()->getCanonicalQuery()->getCollator()); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = executor.getValue()->getNext(&obj, NULL))) { // Distinct expands arrays. // // If our query is covered, each value of the key should be in the index key and // available to us without this. If a collection scan is providing the data, we may // have to expand an array. BSONElementSet elts; dps::extractAllElementsAlongPath(obj, key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } // Return an error if execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { log() << "Plan executor error during distinct command: " << redact(PlanExecutor::statestr(state)) << ", stats: " << redact(Explain::getWinningPlanStats(executor.getValue().get())); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during distinct command: " << WorkingSetCommon::toStatusString(obj))); } auto curOp = CurOp::get(txn); // Get summary information about the plan. PlanSummaryStats stats; Explain::getSummaryStats(*executor.getValue(), &stats); if (collection) { collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed); } curOp->debug().setPlanSummaryMetrics(stats); if (curOp->shouldDBProfile()) { BSONObjBuilder execStatsBob; Explain::getWinningPlanStats(executor.getValue().get(), &execStatsBob); curOp->debug().execStats = execStatsBob.obj(); } verify(start == bb.buf()); result.appendArray("values", arr.done()); return true; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches NamespaceDetails * d = nsdetails( ns ); string cursorName; if (!d) { result.appendArray( "values" , BSONObj() ); result.append("stats", BSON("n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0)); return true; } CanonicalQuery* cq; // XXX: project out just the field we're distinct-ing. May be covered... if (!CanonicalQuery::canonicalize(ns, query, &cq).isOK()) { uasserted(17215, "Can't canonicalize query " + query.toString()); return 0; } Runner* rawRunner; if (!getRunner(cq, &rawRunner).isOK()) { uasserted(17216, "Can't get runner for query " + query.toString()); return 0; } auto_ptr<Runner> runner(rawRunner); auto_ptr<DeregisterEvenIfUnderlyingCodeThrows> safety; ClientCursor::registerRunner(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); safety.reset(new DeregisterEvenIfUnderlyingCodeThrows(runner.get())); BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { BSONElementSet elts; obj.getFieldsDotted(key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } TypeExplain* bareExplain; Status res = runner->getExplainPlan(&bareExplain); if (res.isOK()) { auto_ptr<TypeExplain> explain(bareExplain); if (explain->isCursorSet()) { cursorName = explain->getCursor(); } n = explain->getN(); nscanned = explain->getNScanned(); nscannedObjects = explain->getNScannedObjects(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }