/** * This is called by db/ops/query.cpp. This is the entry point for answering a query. */ string newRunQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { // This is a read lock. Client::ReadContext ctx(q.ns, dbpath); // Parse, canonicalize, plan, transcribe, and get a runner. Runner* rawRunner; CanonicalQuery* cq; Status status = getRunner(q, &rawRunner, &cq); if (!status.isOK()) { uasserted(17007, "Couldn't process query " + q.query.toString() + " why: " + status.reason()); } verify(NULL != rawRunner); auto_ptr<Runner> runner(rawRunner); log() << "Running query on new system: " << cq->toString(); // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState.getVersion(q.ns); // We use this a lot below. const LiteParsedQuery& pq = cq->getParsed(); // TODO: Remove when impl'd if (pq.hasOption(QueryOption_OplogReplay)) { warning() << "haven't implemented findingstartcursor yet\n"; } // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); killCurrentOp.checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. replVerifyReadsOk(&pq); // If this exists, the collection is sharded. // If it doesn't exist, we can assume we're not sharded. // If we're sharded, we might encounter data that is not consistent with our sharding state. // We must ignore this data. CollectionMetadataPtr collMetadata; if (!shardingState.needCollectionMetadata(pq.ns())) { collMetadata = CollectionMetadataPtr(); } else { collMetadata = shardingState.getCollectionMetadata(pq.ns()); } // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(32768); bb.skip(sizeof(QueryResult)); // How many results have we obtained from the runner? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. OpTime slaveReadTill; // Do we save the Runner in a ClientCursor for getMore calls later? bool saveClientCursor = false; // We turn on auto-yielding for the runner here. The runner registers itself with the // active runners list in ClientCursor. ClientCursor::registerRunner(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); auto_ptr<DeregisterEvenIfUnderlyingCodeThrows> safety( new DeregisterEvenIfUnderlyingCodeThrows(runner.get())); BSONObj obj; Runner::RunnerState state; // set this outside loop. we will need to use this both within loop and when deciding // to fill in explain information const bool isExplain = pq.isExplain(); while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { // If we're sharded make sure that we don't return any data that hasn't been migrated // off of our shared yet. if (collMetadata) { // This information can change if we yield and as such we must make sure to re-fetch // it if we yield. KeyPattern kp(collMetadata->getKeyPattern()); // This performs excessive BSONObj creation but that's OK for now. if (!collMetadata->keyBelongsToMe(kp.extractSingleKey(obj))) { continue; } } // Add result to output buffer. This is unnecessary if explain info is requested if (!isExplain) { bb.appendBuf((void*)obj.objdata(), obj.objsize()); } // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.hasOption(QueryOption_OplogReplay)) { BSONElement e = obj["ts"]; if (Date == e.type() || Timestamp == e.type()) { slaveReadTill = e._opTime(); } } // TODO: only one type of 2d search doesn't support this. We need a way to pull it out // of CanonicalQuery. :( const bool supportsGetMore = true; if (isExplain) { if (enoughForExplain(pq, numResults)) { break; } } else if (!supportsGetMore && (enough(pq, numResults) || bb.len() >= MaxBytesToReturnToClientAtOnce)) { break; } else if (enoughForFirstBatch(pq, numResults, bb.len())) { // If only one result requested assume it's a findOne() and don't save the cursor. if (pq.wantMore() && 1 != pq.getNumToReturn()) { saveClientCursor = true; } break; } } // If we cache the runner later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the runner later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the runner. safety.reset(); // Caller expects exceptions thrown in certain cases: // * in-memory sort using too much RAM. if (Runner::RUNNER_ERROR == state) { uasserted(17144, "Runner error, memory limit for sort probably exceeded"); } // Why save a dead runner? if (Runner::RUNNER_DEAD == state) { saveClientCursor = false; } else if (pq.hasOption(QueryOption_CursorTailable) && (1 != pq.getNumToReturn())) { // If pq.hasOption(tailable) the only plan the planner will output is a collscan with // tailable set. saveClientCursor = true; } // TODO(greg): This will go away soon. if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongos can resend at this point throw SendStaleConfigException(pq.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(pq.ns())); } long long ccId = 0; if (saveClientCursor) { // We won't use the runner until it's getMore'd. runner->saveState(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(runner.get(), cq->getParsed().getOptions(), cq->getParsed().getFilter()); ccId = cc->cursorid(); log() << "caching runner with cursorid " << ccId << " after returning " << numResults << " results" << endl; // ClientCursor takes ownership of runner. Release to make sure it's not deleted. runner.release(); // TODO document if (pq.hasOption(QueryOption_OplogReplay) && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.hasOption(QueryOption_Exhaust)) { curop.debug().exhaust = true; } // Set attributes for getMore. cc->setCollMetadata(collMetadata); cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); } // append explain information to query results if (isExplain) { BSONObjBuilder bob; bob.append("n", numResults); BSONObj obj = bob.done(); bb.appendBuf((void*)obj.objdata(), obj.objsize()); // The explain output is actually a result. numResults = 1; } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult* qr = static_cast<QueryResult*>(result.header()); qr->cursorId = ccId; curop.debug().cursorid = (0 == ccId ? -1 : ccId); qr->setResultFlagsToOk(); qr->setOperation(opReply); qr->startingFrom = 0; qr->nReturned = numResults; // TODO: nscanned is bogus. // curop.debug().nscanned = ( cursor ? cursor->nscanned() : 0LL ); curop.debug().ntoskip = pq.getSkip(); curop.debug().nreturned = numResults; // curop.debug().exhaust is set above. return curop.debug().exhaust ? pq.ns() : ""; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches MatchDetails md; NamespaceDetails * d = nsdetails( ns.c_str() ); if ( ! d ) { result.appendArray( "values" , BSONObj() ); result.append( "stats" , BSON( "n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0 ) ); return true; } shared_ptr<Cursor> cursor; if ( ! query.isEmpty() ) { cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() ); } else { // query is empty, so lets see if we can find an index // with the key so we don't have to hit the raw data NamespaceDetails::IndexIterator ii = d->ii(); while ( ii.more() ) { IndexDetails& idx = ii.next(); if ( d->isMultikey( ii.pos() - 1 ) ) continue; if ( idx.inKeyPattern( key ) ) { cursor = NamespaceDetailsTransient::bestGuessCursor( ns.c_str() , BSONObj() , idx.keyPattern() ); if( cursor.get() ) break; } } if ( ! cursor.get() ) cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query , BSONObj() ); } verify( cursor ); string cursorName = cursor->toString(); auto_ptr<ClientCursor> cc (new ClientCursor(QueryOption_NoCursorTimeout, cursor, ns)); while ( cursor->ok() ) { nscanned++; bool loadedRecord = false; if ( cursor->currentMatches( &md ) && !cursor->getsetdup( cursor->currLoc() ) ) { n++; BSONObj holder; BSONElementSet temp; loadedRecord = ! cc->getFieldsDotted( key , temp, holder ); for ( BSONElementSet::iterator i=temp.begin(); i!=temp.end(); ++i ) { BSONElement e = *i; if ( values.count( e ) ) continue; int now = bb.len(); uassert(10044, "distinct too big, 16mb cap", ( now + e.size() + 1024 ) < bufSize ); arr.append( e ); BSONElement x( start + now ); values.insert( x ); } } if ( loadedRecord || md.hasLoadedRecord() ) nscannedObjects++; cursor->advance(); if (!cc->yieldSometimes( ClientCursor::MaybeCovered )) { cc.release(); break; } RARELY killCurrentOp.checkForInterrupt(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
// Gets the string representation of a BSON object that can be correctly written to a CSV file string csvString (const BSONElement& object) { const char* binData; // Only used with BinData type switch (object.type()) { case MinKey: return "$MinKey"; case MaxKey: return "$MaxKey"; case NumberInt: case NumberDouble: case NumberLong: case Bool: return object.toString(false); case String: case Symbol: return csvEscape(object.toString(false), true); case Object: return csvEscape(object.jsonString(Strict, false)); case Array: return csvEscape(object.jsonString(Strict, false)); case BinData: int len; binData = object.binDataClean(len); return toHex(binData, len); case jstOID: return "ObjectID(" + object.OID().toString() + ")"; // OIDs are always 24 bytes case Date: return timeToISOString(object.Date() / 1000); case Timestamp: return csvEscape(object.jsonString(Strict, false)); case RegEx: return csvEscape("/" + string(object.regex()) + "/" + string(object.regexFlags())); case Code: return csvEscape(object.toString(false)); case CodeWScope: if (string(object.codeWScopeScopeData()) == "") { return csvEscape(object.toString(false)); } else { return csvEscape(object.jsonString(Strict, false)); } case EOO: case Undefined: case DBRef: case jstNULL: cerr << "Invalid BSON object type for CSV output: " << object.type() << endl; return ""; } // Can never get here verify(false); return ""; }
bool debug( const BSONObj& o , int depth=0) { string prefix = ""; for ( int i=0; i<depth; i++ ) { prefix += "\t\t\t"; } int read = 4; try { cout << prefix << "--- new object ---\n"; cout << prefix << "\t size : " << o.objsize() << "\n"; BSONObjIterator i(o); while ( i.more() ) { BSONElement e = i.next(); cout << prefix << "\t\t " << e.fieldName() << "\n" << prefix << "\t\t\t type:" << setw(3) << e.type() << " size: " << e.size() << endl; if ( ( read + e.size() ) > o.objsize() ) { cout << prefix << " SIZE DOES NOT WORK" << endl; return false; } read += e.size(); try { e.validate(); if ( e.isABSONObj() ) { if ( ! debug( e.Obj() , depth + 1 ) ) { //return false; cout << prefix << "\t\t\t BAD BAD BAD" << endl; if ( e.size() < 1000 ) { cout << "---\n" << e.Obj().hexDump() << "\n---" << endl; } } } else if ( e.type() == String && ! isValidUTF8( e.valuestr() ) ) { cout << prefix << "\t\t\t" << "bad utf8 String!" << endl; } else if ( logLevel > 0 ) { cout << prefix << "\t\t\t" << e << endl; } } catch ( std::exception& e ) { cout << prefix << "\t\t\t bad value: " << e.what() << endl; } } } catch ( std::exception& e ) { cout << prefix << "\tbad\t" << e.what() << endl; cout << "----\n" << o.hexDump() << "\n---" << endl; } return true; }
/** * actually applies a reduce, to a list of tuples (key, value). * After the call, tuples will hold a single tuple {"0": key, "1": value} */ void JSReducer::_reduce( const BSONList& tuples , BSONObj& key , int& endSizeEstimate ) { uassert( 10074 , "need values" , tuples.size() ); int sizeEstimate = ( tuples.size() * tuples.begin()->getField( "value" ).size() ) + 128; // need to build the reduce args: ( key, [values] ) BSONObjBuilder reduceArgs( sizeEstimate ); boost::scoped_ptr<BSONArrayBuilder> valueBuilder; int sizeSoFar = 0; unsigned n = 0; for ( ; n<tuples.size(); n++ ) { BSONObjIterator j(tuples[n]); BSONElement keyE = j.next(); if ( n == 0 ) { reduceArgs.append( keyE ); key = keyE.wrap(); sizeSoFar = 5 + keyE.size(); valueBuilder.reset(new BSONArrayBuilder( reduceArgs.subarrayStart( "tuples" ) )); } BSONElement ee = j.next(); uassert( 13070 , "value too large to reduce" , ee.size() < ( BSONObjMaxUserSize / 2 ) ); if ( sizeSoFar + ee.size() > BSONObjMaxUserSize ) { assert( n > 1 ); // if not, inf. loop break; } valueBuilder->append( ee ); sizeSoFar += ee.size(); } assert(valueBuilder); valueBuilder->done(); BSONObj args = reduceArgs.obj(); Scope * s = _func.scope(); s->invokeSafe( _func.func() , &args, 0 ); ++numReduces; if ( s->type( "return" ) == Array ) { uasserted( 10075 , "reduce -> multiple not supported yet"); return; } endSizeEstimate = key.objsize() + ( args.objsize() / tuples.size() ); if ( n == tuples.size() ) return; // the input list was too large, add the rest of elmts to new tuples and reduce again // note: would be better to use loop instead of recursion to avoid stack overflow BSONList x; for ( ; n < tuples.size(); n++ ) { x.push_back( tuples[n] ); } BSONObjBuilder temp( endSizeEstimate ); temp.append( key.firstElement() ); s->append( temp , "1" , "return" ); x.push_back( temp.obj() ); _reduce( x , key , endSizeEstimate ); }
// static void ExpressionKeysPrivate::get2DKeys(const BSONObj &obj, const TwoDIndexingParams& params, BSONObjSet* keys, std::vector<BSONObj>* locs) { BSONElementMSet bSet; // Get all the nested location fields, but don't return individual elements from // the last array, if it exists. obj.getFieldsDotted(params.geo.c_str(), bSet, false); if (bSet.empty()) return; for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { BSONElement geo = *setI; if (geo.eoo() || !geo.isABSONObj()) continue; // // Grammar for location lookup: // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc // loc ::= { <k1> : #, <k2> : # }|[#, #]|{} // // Empty locations are ignored, preserving single-location semantics // BSONObj embed = geo.embeddedObject(); if (embed.isEmpty()) continue; // Differentiate between location arrays and locations // by seeing if the first element value is a number bool singleElement = embed.firstElement().isNumber(); BSONObjIterator oi(embed); while (oi.more()) { BSONObj locObj; if (singleElement) { locObj = embed; } else { BSONElement locElement = oi.next(); uassert(16804, mongoutils::str::stream() << "location object expected, location array not in correct format", locElement.isABSONObj()); locObj = locElement.embeddedObject(); if(locObj.isEmpty()) continue; } BSONObjBuilder b(64); // Remember the actual location object if needed if (locs) locs->push_back(locObj); // Stop if we don't need to get anything but location objects if (!keys) { if (singleElement) break; else continue; } params.geoHashConverter->hash(locObj, &obj).appendHashMin(&b, ""); // Go through all the other index keys for (vector<pair<string, int> >::const_iterator i = params.other.begin(); i != params.other.end(); ++i) { // Get *all* fields for the index key BSONElementSet eSet; obj.getFieldsDotted(i->first, eSet); if (eSet.size() == 0) b.appendNull(""); else if (eSet.size() == 1) b.appendAs(*(eSet.begin()), ""); else { // If we have more than one key, store as an array of the objects BSONArrayBuilder aBuilder; for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end(); ++ei) { aBuilder.append(*ei); } b.append("", aBuilder.arr()); } } keys->insert(b.obj()); if(singleElement) break; } } }
void handleRESTQuery( string ns , string action , BSONObj & params , int & responseCode , stringstream & out ) { Timer t; int skip = _getOption( params["skip"] , 0 ); int num = _getOption( params["limit"] , _getOption( params["count" ] , 1000 ) ); // count is old, limit is new int one = 0; if ( params["one"].type() == String && tolower( params["one"].valuestr()[0] ) == 't' ) { num = 1; one = 1; } BSONObjBuilder queryBuilder; BSONObjIterator i(params); while ( i.more() ){ BSONElement e = i.next(); string name = e.fieldName(); if ( ! name.find( "filter_" ) == 0 ) continue; const char * field = name.substr( 7 ).c_str(); const char * val = e.valuestr(); char * temp; // TODO: this is how i guess if something is a number. pretty lame right now double number = strtod( val , &temp ); if ( temp != val ) queryBuilder.append( field , number ); else queryBuilder.append( field , val ); } BSONObj query = queryBuilder.obj(); auto_ptr<DBClientCursor> cursor = db.query( ns.c_str() , query, num , skip ); if ( one ) { if ( cursor->more() ) { BSONObj obj = cursor->next(); out << obj.jsonString() << "\n"; } else { responseCode = 404; } return; } out << "{\n"; out << " \"offset\" : " << skip << ",\n"; out << " \"rows\": [\n"; int howMany = 0; while ( cursor->more() ) { if ( howMany++ ) out << " ,\n"; BSONObj obj = cursor->next(); out << " " << obj.jsonString(); } out << "\n ],\n\n"; out << " \"total_rows\" : " << howMany << " ,\n"; out << " \"query\" : " << query.jsonString() << " ,\n"; out << " \"millis\" : " << t.millis() << "\n"; out << "}\n"; }
Status RegexMatchExpression::init( const StringData& path, const BSONElement& e ) { if ( e.type() != RegEx ) return Status( ErrorCodes::BadValue, "regex not a regex" ); return init( path, e.regex(), e.regexFlags() ); }
bool ModMatchExpression::matchesSingleElement( const BSONElement& e ) const { if ( !e.isNumber() ) return false; return e.numberLong() % _divisor == _remainder; }
bool GeoParser::parseCap(const BSONObj& obj, CapWithCRS *out) { if (isLegacyCenter(obj)) { BSONObjIterator typeIt(obj); BSONElement type = typeIt.next(); BSONObjIterator objIt(type.embeddedObject()); BSONElement center = objIt.next(); if (!parseLegacyPoint(center.Obj(), &out->circle.center)) { return false; } BSONElement radius = objIt.next(); out->circle.radius = radius.number(); out->crs = FLAT; } else { BSONObjIterator typeIt(obj); BSONElement type = typeIt.next(); BSONObjIterator objIt(type.embeddedObject()); BSONObj centerObj = objIt.next().Obj(); S2Point centerPoint; BSONObjIterator it(centerObj); BSONElement x = it.next(); BSONElement y = it.next(); centerPoint = coordToPoint(x.Number(), y.Number()); BSONElement radiusElt = objIt.next(); double radius = radiusElt.number(); out->cap = S2Cap::FromAxisAngle(centerPoint, S1Angle::Radians(radius)); out->crs = SPHERE; } return true; }
StatusWith<BSONObj> fixDocumentForInsert( const BSONObj& doc ) { if ( doc.objsize() > BSONObjMaxUserSize ) return StatusWith<BSONObj>( ErrorCodes::BadValue, str::stream() << "object to insert too large" << doc.objsize() ); bool firstElementIsId = doc.firstElement().fieldNameStringData() == "_id"; bool hasTimestampToFix = false; { BSONObjIterator i( doc ); while ( i.more() ) { BSONElement e = i.next(); if ( e.type() == Timestamp && e.timestampValue() == 0 ) { // we replace Timestamp(0,0) at the top level with a correct value // in the fast pass, we just mark that we want to swap hasTimestampToFix = true; break; } const char* fieldName = e.fieldName(); if ( fieldName[0] == '$' ) { return StatusWith<BSONObj>( ErrorCodes::BadValue, str::stream() << "Document can't have $ prefixed field names: " << e.fieldName() ); } // check no regexp for _id (SERVER-9502) // also, disallow undefined and arrays if ( str::equals( fieldName, "_id") ) { if ( e.type() == RegEx ) { return StatusWith<BSONObj>( ErrorCodes::BadValue, "can't use a regex for _id" ); } if ( e.type() == Undefined ) { return StatusWith<BSONObj>( ErrorCodes::BadValue, "can't use a undefined for _id" ); } if ( e.type() == Array ) { return StatusWith<BSONObj>( ErrorCodes::BadValue, "can't use an array for _id" ); } if ( e.type() == Object ) { BSONObj o = e.Obj(); Status s = o.storageValidEmbedded(); if ( !s.isOK() ) return StatusWith<BSONObj>( s ); } } } } if ( firstElementIsId && !hasTimestampToFix ) return StatusWith<BSONObj>( BSONObj() ); bool hadId = firstElementIsId; BSONObjIterator i( doc ); BSONObjBuilder b( doc.objsize() + 16 ); if ( firstElementIsId ) { b.append( doc.firstElement() ); i.next(); } else { BSONElement e = doc["_id"]; if ( e.type() ) { b.append( e ); hadId = true; } else { b.appendOID( "_id", NULL, true ); } } while ( i.more() ) { BSONElement e = i.next(); if ( hadId && e.fieldNameStringData() == "_id" ) { // no-op } else if ( e.type() == Timestamp && e.timestampValue() == 0 ) { mutex::scoped_lock lk(OpTime::m); b.append( e.fieldName(), OpTime::now(lk) ); } else { b.append( e ); } } return StatusWith<BSONObj>( b.obj() ); }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails *nsd = nsdetails(ns); if (NULL == nsd) { errmsg = "can't find ns"; return false; } vector<int> idxs; nsd->findIndexByType(GEOSEARCHNAME, idxs); if (idxs.size() == 0) { errmsg = "no geoSearch index"; return false; } if (idxs.size() > 1) { errmsg = "more than 1 geosearch index"; return false; } BSONElement nearElt = cmdObj["near"]; BSONElement maxDistance = cmdObj["maxDistance"]; BSONElement search = cmdObj["search"]; uassert(13318, "near needs to be an array", nearElt.isABSONObj()); uassert(13319, "maxDistance needs a number", maxDistance.isNumber()); uassert(13320, "search needs to be an object", search.type() == Object); unsigned limit = 50; if (cmdObj["limit"].isNumber()) limit = static_cast<unsigned>(cmdObj["limit"].numberInt()); int idxNum = idxs[0]; IndexDetails& id = nsd->idx(idxNum); if (CatalogHack::testIndexMigration()) { auto_ptr<IndexDescriptor> desc(CatalogHack::getDescriptor(nsd, idxNum)); auto_ptr<HaystackAccessMethod> ham(new HaystackAccessMethod(desc.get())); ham->searchCommand(nearElt.Obj(), maxDistance.numberDouble(), search.Obj(), &result, limit); } else { GeoHaystackSearchIndex *si = static_cast<GeoHaystackSearchIndex*>(id.getSpec().getType()); verify(&id == si->getDetails()); si->searchCommand(nsd, nearElt.Obj(), maxDistance.numberDouble(), search.Obj(), result, limit); } return 1; }
// TODO(hk): consider moving hash/unhash/makeString out int hash(const BSONElement& e) const { uassert(13322, "geo field is not a number", e.isNumber()); return hash(e.numberDouble()); }
void searchCommand(NamespaceDetails* nsd, const BSONObj& n /*near*/, double maxDistance, const BSONObj& search, BSONObjBuilder& result, unsigned limit) { Timer t; LOG(1) << "SEARCH near:" << n << " maxDistance:" << maxDistance << " search: " << search << endl; int x, y; { BSONObjIterator i(n); x = hash(i.next()); y = hash(i.next()); } int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); GeoHaystackSearchHopper hopper(n, maxDistance, limit, _geoField); long long btreeMatches = 0; // TODO(hk): Consider starting with a (or b)=0, then going to a=+-1, then a=+-2, etc. // Would want a HaystackKeyIterator or similar for this, but it'd be a nice // encapsulation allowing us to S2-ify this trivially/abstract the key details. for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { BSONObjBuilder bb; bb.append("", makeString(x + a, y + b)); for (unsigned i = 0; i < _otherFields.size(); i++) { // See if the non-geo field we're indexing on is in the provided search term. BSONElement e = search.getFieldDotted(_otherFields[i]); if (e.eoo()) bb.appendNull(""); else bb.appendAs(e, ""); } BSONObj key = bb.obj(); GEOQUADDEBUG("KEY: " << key); // TODO(hk): this keeps a set of all DiskLoc seen in this pass so that we don't // consider the element twice. Do we want to instead store a hash of the set? // Is this often big? set<DiskLoc> thisPass; // Lookup from key to key, inclusive. scoped_ptr<BtreeCursor> cursor(BtreeCursor::make(nsd, *getDetails(), key, key, true, 1)); while (cursor->ok() && !hopper.limitReached()) { pair<set<DiskLoc>::iterator, bool> p = thisPass.insert(cursor->currLoc()); // If a new element was inserted (haven't seen the DiskLoc before), p.second // is true. if (p.second) { hopper.consider(cursor->currLoc()); GEOQUADDEBUG("\t" << cursor->current()); btreeMatches++; } cursor->advance(); } } } BSONArrayBuilder arr(result.subarrayStart("results")); int num = hopper.appendResultsTo(&arr); arr.done(); { BSONObjBuilder b(result.subobjStart("stats")); b.append("time", t.millis()); b.appendNumber("btreeMatches", btreeMatches); b.append("n", num); b.done(); } }
PlanStage* parseQuery(OperationContext* txn, Collection* collection, BSONObj obj, WorkingSet* workingSet, OwnedPointerVector<MatchExpression>* exprs) { BSONElement firstElt = obj.firstElement(); if (!firstElt.isABSONObj()) { return NULL; } BSONObj paramObj = firstElt.Obj(); MatchExpression* matcher = NULL; BSONObj nodeArgs; // Every node has these two fields. const string filterTag = "filter"; const string argsTag = "args"; BSONObjIterator it(paramObj); while (it.more()) { BSONElement e = it.next(); if (!e.isABSONObj()) { return NULL; } BSONObj argObj = e.Obj(); if (filterTag == e.fieldName()) { StatusWithMatchExpression statusWithMatcher = MatchExpressionParser::parse( argObj, ExtensionsCallbackReal(txn, &collection->ns())); if (!statusWithMatcher.isOK()) { return NULL; } std::unique_ptr<MatchExpression> me = std::move(statusWithMatcher.getValue()); // exprs is what will wind up deleting this. matcher = me.release(); verify(NULL != matcher); exprs->mutableVector().push_back(matcher); } else if (argsTag == e.fieldName()) { nodeArgs = argObj; } else { uasserted(16910, "Unknown fieldname " + string(e.fieldName()) + " in query node " + obj.toString()); return NULL; } } string nodeName = firstElt.fieldName(); if ("ixscan" == nodeName) { // This'll throw if it's not an obj but that's OK. BSONObj keyPatternObj = nodeArgs["keyPattern"].Obj(); IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern(txn, keyPatternObj); uassert(16890, "Can't find index: " + keyPatternObj.toString(), desc); IndexScanParams params; params.descriptor = desc; params.bounds.isSimpleRange = true; params.bounds.startKey = stripFieldNames(nodeArgs["startKey"].Obj()); params.bounds.endKey = stripFieldNames(nodeArgs["endKey"].Obj()); params.bounds.endKeyInclusive = nodeArgs["endKeyInclusive"].Bool(); params.direction = nodeArgs["direction"].numberInt(); return new IndexScan(txn, params, workingSet, matcher); } else if ("andHash" == nodeName) { uassert( 16921, "Nodes argument must be provided to AND", nodeArgs["nodes"].isABSONObj()); auto andStage = make_unique<AndHashStage>(txn, workingSet, collection); int nodesAdded = 0; BSONObjIterator it(nodeArgs["nodes"].Obj()); while (it.more()) { BSONElement e = it.next(); uassert(16922, "node of AND isn't an obj?: " + e.toString(), e.isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert( 16923, "Can't parse sub-node of AND: " + e.Obj().toString(), NULL != subNode); // takes ownership andStage->addChild(subNode); ++nodesAdded; } uassert(16927, "AND requires more than one child", nodesAdded >= 2); return andStage.release(); } else if ("andSorted" == nodeName) { uassert( 16924, "Nodes argument must be provided to AND", nodeArgs["nodes"].isABSONObj()); auto andStage = make_unique<AndSortedStage>(txn, workingSet, collection); int nodesAdded = 0; BSONObjIterator it(nodeArgs["nodes"].Obj()); while (it.more()) { BSONElement e = it.next(); uassert(16925, "node of AND isn't an obj?: " + e.toString(), e.isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert( 16926, "Can't parse sub-node of AND: " + e.Obj().toString(), NULL != subNode); // takes ownership andStage->addChild(subNode); ++nodesAdded; } uassert(16928, "AND requires more than one child", nodesAdded >= 2); return andStage.release(); } else if ("or" == nodeName) { uassert( 16934, "Nodes argument must be provided to AND", nodeArgs["nodes"].isABSONObj()); uassert(16935, "Dedup argument must be provided to OR", !nodeArgs["dedup"].eoo()); BSONObjIterator it(nodeArgs["nodes"].Obj()); auto orStage = make_unique<OrStage>(txn, workingSet, nodeArgs["dedup"].Bool(), matcher); while (it.more()) { BSONElement e = it.next(); if (!e.isABSONObj()) { return NULL; } PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert( 16936, "Can't parse sub-node of OR: " + e.Obj().toString(), NULL != subNode); // takes ownership orStage->addChild(subNode); } return orStage.release(); } else if ("fetch" == nodeName) { uassert( 16929, "Node argument must be provided to fetch", nodeArgs["node"].isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); uassert(28731, "Can't parse sub-node of FETCH: " + nodeArgs["node"].Obj().toString(), NULL != subNode); return new FetchStage(txn, workingSet, subNode, matcher, collection); } else if ("limit" == nodeName) { uassert( 16937, "Limit stage doesn't have a filter (put it on the child)", NULL == matcher); uassert( 16930, "Node argument must be provided to limit", nodeArgs["node"].isABSONObj()); uassert(16931, "Num argument must be provided to limit", nodeArgs["num"].isNumber()); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); uassert(28732, "Can't parse sub-node of LIMIT: " + nodeArgs["node"].Obj().toString(), NULL != subNode); return new LimitStage(txn, nodeArgs["num"].numberInt(), workingSet, subNode); } else if ("skip" == nodeName) { uassert( 16938, "Skip stage doesn't have a filter (put it on the child)", NULL == matcher); uassert(16932, "Node argument must be provided to skip", nodeArgs["node"].isABSONObj()); uassert(16933, "Num argument must be provided to skip", nodeArgs["num"].isNumber()); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); uassert(28733, "Can't parse sub-node of SKIP: " + nodeArgs["node"].Obj().toString(), NULL != subNode); return new SkipStage(txn, nodeArgs["num"].numberInt(), workingSet, subNode); } else if ("cscan" == nodeName) { CollectionScanParams params; params.collection = collection; // What direction? uassert(16963, "Direction argument must be specified and be a number", nodeArgs["direction"].isNumber()); if (1 == nodeArgs["direction"].numberInt()) { params.direction = CollectionScanParams::FORWARD; } else { params.direction = CollectionScanParams::BACKWARD; } return new CollectionScan(txn, params, workingSet, matcher); } // sort is disabled for now. #if 0 else if ("sort" == nodeName) { uassert(16969, "Node argument must be provided to sort", nodeArgs["node"].isABSONObj()); uassert(16970, "Pattern argument must be provided to sort", nodeArgs["pattern"].isABSONObj()); PlanStage* subNode = parseQuery(txn, db, nodeArgs["node"].Obj(), workingSet, exprs); SortStageParams params; params.pattern = nodeArgs["pattern"].Obj(); return new SortStage(params, workingSet, subNode); } #endif else if ("mergeSort" == nodeName) { uassert( 16971, "Nodes argument must be provided to sort", nodeArgs["nodes"].isABSONObj()); uassert(16972, "Pattern argument must be provided to sort", nodeArgs["pattern"].isABSONObj()); MergeSortStageParams params; params.pattern = nodeArgs["pattern"].Obj(); // Dedup is true by default. auto mergeStage = make_unique<MergeSortStage>(txn, params, workingSet, collection); BSONObjIterator it(nodeArgs["nodes"].Obj()); while (it.more()) { BSONElement e = it.next(); uassert(16973, "node of mergeSort isn't an obj?: " + e.toString(), e.isABSONObj()); PlanStage* subNode = parseQuery(txn, collection, e.Obj(), workingSet, exprs); uassert(16974, "Can't parse sub-node of mergeSort: " + e.Obj().toString(), NULL != subNode); // takes ownership mergeStage->addChild(subNode); } return mergeStage.release(); } else if ("text" == nodeName) { string search = nodeArgs["search"].String(); vector<IndexDescriptor*> idxMatches; collection->getIndexCatalog()->findIndexByType(txn, "text", idxMatches); uassert(17194, "Expected exactly one text index", idxMatches.size() == 1); IndexDescriptor* index = idxMatches[0]; FTSAccessMethod* fam = dynamic_cast<FTSAccessMethod*>(collection->getIndexCatalog()->getIndex(index)); TextStageParams params(fam->getSpec()); params.index = index; // TODO: Deal with non-empty filters. This is a hack to put in covering information // that can only be checked for equality. We ignore this now. Status s = fam->getSpec().getIndexPrefix(BSONObj(), ¶ms.indexPrefix); if (!s.isOK()) { // errmsg = s.toString(); return NULL; } params.spec = fam->getSpec(); params.query.setQuery(search); params.query.setLanguage(fam->getSpec().defaultLanguage().str()); params.query.setCaseSensitive(TextMatchExpressionBase::kCaseSensitiveDefault); params.query.setDiacriticSensitive(TextMatchExpressionBase::kDiacriticSensitiveDefault); if (!params.query.parse(fam->getSpec().getTextIndexVersion()).isOK()) { return NULL; } return new TextStage(txn, params, workingSet, matcher); } else if ("delete" == nodeName) { uassert( 18636, "Delete stage doesn't have a filter (put it on the child)", NULL == matcher); uassert( 18637, "node argument must be provided to delete", nodeArgs["node"].isABSONObj()); uassert(18638, "isMulti argument must be provided to delete", nodeArgs["isMulti"].type() == Bool); PlanStage* subNode = parseQuery(txn, collection, nodeArgs["node"].Obj(), workingSet, exprs); uassert(28734, "Can't parse sub-node of DELETE: " + nodeArgs["node"].Obj().toString(), NULL != subNode); DeleteStageParams params; params.isMulti = nodeArgs["isMulti"].Bool(); return new DeleteStage(txn, params, workingSet, collection, subNode); } else { return NULL; } }
bool ExistsMatchExpression::matchesSingleElement( const BSONElement& e ) const { return !e.eoo(); }
// PD_TRACE_DECLARE_FUNCTION ( SDB__IXMKEYGEN__GENKEYSWITHARRELE, "_ixmKeyGenerator::_genKeyWithArrayEle" ) INT32 _genKeyWithArrayEle( BSONElement *keyEles, UINT32 eleNum, const BSONElement *arrElement, const CHAR *arrEleName, UINT32 arrElePos, BSONObjSet &keys ) const { PD_TRACE_ENTRY ( SDB__IXMKEYGEN__GENKEYSWITHARRELE ); INT32 rc = SDB_OK ; BSONObj arrObj = arrElement->embeddedObject() ; if ( arrObj.firstElement().eoo() ) { keyEles[arrElePos] = *arrElement ; rc = _genKeyWithNormalEle( keyEles, eleNum, keys ) ; if ( SDB_OK != rc ) { goto error ; } } if ( '\0' == *arrEleName ) { BSONObjIterator itr( arrObj ) ; BSONElement &e = keyEles[arrElePos] ; while ( itr.more() ) { e = itr.next() ; rc = _genKeyWithNormalEle( keyEles, eleNum, keys ) ; if ( SDB_OK != rc ) { goto error ; } } } else { BSONObjIterator itr( arrObj ) ; while ( itr.more() ) { const CHAR *dottedName = arrEleName ; BSONElement next = itr.next() ; if ( Object == next.type() ) { BSONElement e = next.embeddedObject() .getFieldDottedOrArray( dottedName ) ; if ( Array == e.type() ) { rc = _genKeyWithArrayEle(keyEles, eleNum, &e, dottedName, arrElePos, keys) ; if ( SDB_OK != rc ) { goto error ; } else { continue ; } } else { keyEles[arrElePos] = e ; } } else { keyEles[arrElePos] = BSONElement() ; } rc = _genKeyWithNormalEle( keyEles, eleNum, keys ) ; if ( SDB_OK != rc ) { goto error ; } } } done: PD_TRACE_EXITRC( SDB__IXMKEYGEN__GENKEYSWITHARRELE, rc ) ; return rc ; error: goto done ; }
bool TypeMatchExpression::matchesSingleElement( const BSONElement& e ) const { return e.type() == _type; }
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, BSONObjSet* keys) { BSONObjSet keysToAdd; // Does one of our documents have a geo field? bool haveGeoField = false; // We output keys in the same order as the fields we index. BSONObjIterator i(keyPattern); while (i.more()) { BSONElement e = i.next(); // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; // false means Don't expand the last array, duh. obj.getFieldsDotted(e.fieldName(), fieldElements, false); BSONObjSet keysForThisField; if (IndexNames::GEO_2DSPHERE == e.valuestr()) { if (S2_INDEX_VERSION_2 == params.indexVersion) { // For V2, // geo: null, // geo: undefined // geo: [] // should all behave like there is no geo field. So we look for these cases and // throw out the field elements if we find them. if (1 == fieldElements.size()) { BSONElement elt = *fieldElements.begin(); // Get the :null and :undefined cases. if (elt.isNull() || Undefined == elt.type()) { fieldElements.clear(); } else if (elt.isABSONObj()) { // And this is the :[] case. BSONObj obj = elt.Obj(); if (0 == obj.nFields()) { fieldElements.clear(); } } } // V2 2dsphere indices require that at least one geo field to be present in a // document in order to index it. if (fieldElements.size() > 0) { haveGeoField = true; } } getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { getS2LiteralKeys(fieldElements, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; continue; } BSONObjSet updatedKeysToAdd; for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); newIt!= keysForThisField.end(); ++newIt) { BSONObjBuilder b; b.appendElements(*it); b.append(newIt->firstElement()); updatedKeysToAdd.insert(b.obj()); } } keysToAdd = updatedKeysToAdd; } // Make sure that if we're V2 there's at least one geo field present in the doc. if (S2_INDEX_VERSION_2 == params.indexVersion) { if (!haveGeoField) { return; } } if (keysToAdd.size() > params.maxKeysPerInsert) { warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() << ") consider creating larger buckets. obj=" << obj; } *keys = keysToAdd; }
void operator()(DBClientCursorBatchIterator &i) { const string to_dbname = nsToDatabase(to_collection); while (i.moreInCurrentBatch()) { if (n % 128 == 127) { time_t now = time(0); if (now - lastLog >= 60) { // report progress if (lastLog) { log() << "clone " << to_collection << ' ' << n << endl; } lastLog = now; } mayInterrupt(_mayBeInterrupted); } BSONObj js = i.nextSafe(); ++n; if (isindex) { verify(nsToCollectionSubstring(from_collection) == "system.indexes"); storedForLater->push_back(fixindex(js, to_dbname).getOwned()); } else { try { LOCK_REASON(lockReason, "cloner: copying documents into local collection"); Client::ReadContext ctx(to_collection, lockReason); if (_isCapped) { Collection *cl = getCollection(to_collection); verify(cl->isCapped()); BSONObj pk = js["$_"].Obj(); BSONObjBuilder rowBuilder; BSONObjIterator it(js); while (it.moreWithEOO()) { BSONElement e = it.next(); if (e.eoo()) { break; } if (!mongoutils::str::equals(e.fieldName(), "$_")) { rowBuilder.append(e); } } BSONObj row = rowBuilder.obj(); CappedCollection *cappedCl = cl->as<CappedCollection>(); bool indexBitChanged = false; cappedCl->insertObjectWithPK(pk, row, Collection::NO_LOCKTREE, &indexBitChanged); // Hack copied from Collection::insertObject. TODO: find a better way to do this if (indexBitChanged) { cl->noteMultiKeyChanged(); } } else { insertObject(to_collection, js, 0, logForRepl); } } catch (UserException& e) { error() << "error: exception cloning object in " << from_collection << ' ' << e.what() << " obj:" << js.toString() << '\n'; throw; } RARELY if ( time( 0 ) - saveLast > 60 ) { log() << n << " objects cloned so far from collection " << from_collection << endl; saveLast = time( 0 ); } } } }
long long BSONTool::processFile( const path& root ) { _fileName = root.string(); unsigned long long fileLength = file_size( root ); if ( fileLength == 0 ) { out() << "file " << _fileName << " empty, skipping" << endl; return 0; } FILE* file = fopen( _fileName.c_str() , "rb" ); if ( ! file ) { log() << "error opening file: " << _fileName << endl; return 0; } #if !defined(__sunos__) && defined(POSIX_FADV_SEQUENTIAL) posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL); #endif log(1) << "\t file size: " << fileLength << endl; unsigned long long read = 0; unsigned long long num = 0; unsigned long long processed = 0; const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 ); boost::scoped_array<char> buf_holder(new char[BUF_SIZE]); char * buf = buf_holder.get(); ProgressMeter m( fileLength ); while ( read < fileLength ) { int readlen = fread(buf, 4, 1, file); int size = ((int*)buf)[0]; if ( size >= BUF_SIZE ) { cerr << "got an object of size: " << size << " terminating..." << endl; } uassert( 10264 , "invalid object size" , size < BUF_SIZE ); readlen = fread(buf+4, size-4, 1, file); BSONObj o( buf ); if ( _objcheck && ! o.valid() ) { cerr << "INVALID OBJECT - going try and pring out " << endl; cerr << "size: " << size << endl; BSONObjIterator i(o); while ( i.more() ) { BSONElement e = i.next(); try { e.validate(); } catch ( ... ) { cerr << "\t\t NEXT ONE IS INVALID" << endl; } cerr << "\t name : " << e.fieldName() << " " << e.type() << endl; cerr << "\t " << e << endl; } } if ( _matcher.get() == 0 || _matcher->matches( o ) ) { gotObject( o ); processed++; } read += o.objsize(); num++; m.hit( o.objsize() ); } fclose( file ); uassert( 10265 , "counts don't match" , m.done() == fileLength ); out() << "\t " << m.hits() << " objects found" << endl; if ( _matcher.get() ) out() << "\t " << processed << " objects processed" << endl; return processed; }
intrusive_ptr<DocumentSource> DocumentSourceGroup::createFromBson( BSONElement elem, const intrusive_ptr<ExpressionContext>& pExpCtx) { uassert(15947, "a group's fields must be specified in an object", elem.type() == Object); intrusive_ptr<DocumentSourceGroup> pGroup(DocumentSourceGroup::create(pExpCtx)); BSONObj groupObj(elem.Obj()); BSONObjIterator groupIterator(groupObj); VariablesIdGenerator idGenerator; VariablesParseState vps(&idGenerator); while (groupIterator.more()) { BSONElement groupField(groupIterator.next()); const char* pFieldName = groupField.fieldName(); if (str::equals(pFieldName, "_id")) { uassert( 15948, "a group's _id may only be specified once", pGroup->_idExpressions.empty()); pGroup->parseIdExpression(groupField, vps); invariant(!pGroup->_idExpressions.empty()); } else if (str::equals(pFieldName, "$doingMerge")) { massert(17030, "$doingMerge should be true if present", groupField.Bool()); pGroup->setDoingMerge(true); } else { /* Treat as a projection field with the additional ability to add aggregation operators. */ uassert( 16414, str::stream() << "the group aggregate field name '" << pFieldName << "' cannot be used because $group's field names cannot contain '.'", !str::contains(pFieldName, '.')); uassert(15950, str::stream() << "the group aggregate field name '" << pFieldName << "' cannot be an operator name", pFieldName[0] != '$'); uassert(15951, str::stream() << "the group aggregate field '" << pFieldName << "' must be defined as an expression inside an object", groupField.type() == Object); BSONObj subField(groupField.Obj()); BSONObjIterator subIterator(subField); size_t subCount = 0; for (; subIterator.more(); ++subCount) { BSONElement subElement(subIterator.next()); auto name = subElement.fieldNameStringData(); Accumulator::Factory factory = Accumulator::getFactory(name); intrusive_ptr<Expression> pGroupExpr; BSONType elementType = subElement.type(); if (elementType == Object) { pGroupExpr = Expression::parseObject(subElement.Obj(), vps); } else if (elementType == Array) { uasserted(15953, str::stream() << "aggregating group operators are unary (" << name << ")"); } else { /* assume its an atomic single operand */ pGroupExpr = Expression::parseOperand(subElement, vps); } pGroup->addAccumulator(pFieldName, factory, pGroupExpr); } uassert(15954, str::stream() << "the computed aggregate '" << pFieldName << "' must specify exactly one operator", subCount == 1); } } uassert(15955, "a group specification must include an _id", !pGroup->_idExpressions.empty()); pGroup->_variables.reset(new Variables(idGenerator.getIdCount())); return pGroup; }
bool RunOnAllShardsCommand::run(OperationContext* txn, const std::string& dbName, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& output) { LOG(1) << "RunOnAllShardsCommand db: " << dbName << " cmd:" << cmdObj; if (_implicitCreateDb) { uassertStatusOK(ScopedShardDatabase::getOrCreate(txn, dbName)); } std::vector<ShardId> shardIds; getShardIds(txn, dbName, cmdObj, shardIds); std::list<std::shared_ptr<Future::CommandResult>> futures; for (const ShardId& shardId : shardIds) { const auto shard = grid.shardRegistry()->getShard(txn, shardId); if (!shard) { continue; } futures.push_back(Future::spawnCommand( shard->getConnString().toString(), dbName, cmdObj, 0, NULL, _useShardConn)); } std::vector<ShardAndReply> results; BSONObjBuilder subobj(output.subobjStart("raw")); BSONObjBuilder errors; int commonErrCode = -1; std::list<std::shared_ptr<Future::CommandResult>>::iterator futuresit; std::vector<ShardId>::const_iterator shardIdsIt; BSONElement wcErrorElem; ShardId wcErrorShardId; bool hasWCError = false; // We iterate over the set of shard ids and their corresponding futures in parallel. // TODO: replace with zip iterator if we ever decide to use one from Boost or elsewhere for (futuresit = futures.begin(), shardIdsIt = shardIds.cbegin(); futuresit != futures.end() && shardIdsIt != shardIds.end(); ++futuresit, ++shardIdsIt) { std::shared_ptr<Future::CommandResult> res = *futuresit; if (res->join(txn)) { // success :) BSONObj result = res->result(); results.emplace_back(shardIdsIt->toString(), result); subobj.append(res->getServer(), result); if (!hasWCError) { if ((wcErrorElem = result["writeConcernError"])) { wcErrorShardId = *shardIdsIt; hasWCError = true; } } continue; } BSONObj result = res->result(); if (!hasWCError) { if ((wcErrorElem = result["writeConcernError"])) { wcErrorShardId = *shardIdsIt; hasWCError = true; } } if (result["errmsg"].type() || result["code"].numberInt() != 0) { result = specialErrorHandler(res->getServer(), dbName, cmdObj, result); BSONElement errmsgObj = result["errmsg"]; if (errmsgObj.eoo() || errmsgObj.String().empty()) { // it was fixed! results.emplace_back(shardIdsIt->toString(), result); subobj.append(res->getServer(), result); continue; } } // Handle "errmsg". if (!result["errmsg"].eoo()) { errors.appendAs(result["errmsg"], res->getServer()); } else { // Can happen if message is empty, for some reason errors.append(res->getServer(), str::stream() << "result without error message returned : " << result); } // Handle "code". int errCode = result["code"].numberInt(); if (commonErrCode == -1) { commonErrCode = errCode; } else if (commonErrCode != errCode) { commonErrCode = 0; } results.emplace_back(shardIdsIt->toString(), result); subobj.append(res->getServer(), result); } subobj.done(); if (hasWCError) { appendWriteConcernErrorToCmdResponse(wcErrorShardId, wcErrorElem, output); } BSONObj errobj = errors.done(); if (!errobj.isEmpty()) { errmsg = errobj.toString(); // If every error has a code, and the code for all errors is the same, then add // a top-level field "code" with this value to the output object. if (commonErrCode > 0) { output.append("code", commonErrCode); } return false; } aggregateResults(results, output); return true; }
bool wrappedRun(OperationContext* txn, const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder) { BSONElement e = jsobj.firstElement(); const string toDeleteNs = dbname + '.' + e.valuestr(); if (!serverGlobalParams.quiet) { LOG(0) << "CMD: dropIndexes " << toDeleteNs << endl; } Client::Context ctx(toDeleteNs); Database* db = ctx.db(); Collection* collection = db->getCollection( txn, toDeleteNs ); if ( ! collection ) { errmsg = "ns not found"; return false; } stopIndexBuilds(txn, db, jsobj); IndexCatalog* indexCatalog = collection->getIndexCatalog(); anObjBuilder.appendNumber("nIndexesWas", indexCatalog->numIndexesTotal() ); BSONElement f = jsobj.getField("index"); if ( f.type() == String ) { string indexToDelete = f.valuestr(); if ( indexToDelete == "*" ) { Status s = indexCatalog->dropAllIndexes(txn, false); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } anObjBuilder.append("msg", "non-_id indexes dropped for collection"); return true; } IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByName( indexToDelete ); if ( desc == NULL ) { errmsg = str::stream() << "index not found with name [" << indexToDelete << "]"; return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex(txn, desc); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } if ( f.type() == Object ) { IndexDescriptor* desc = collection->getIndexCatalog()->findIndexByKeyPattern( f.embeddedObject() ); if ( desc == NULL ) { errmsg = "can't find index with key:"; errmsg += f.embeddedObject().toString(); return false; } if ( desc->isIdIndex() ) { errmsg = "cannot drop _id index"; return false; } Status s = indexCatalog->dropIndex(txn, desc); if ( !s.isOK() ) { appendCommandStatus( anObjBuilder, s ); return false; } return true; } errmsg = "invalid index name spec"; return false; }
Config::Config( const string& _dbname , const BSONObj& cmdObj ) { dbname = _dbname; ns = dbname + "." + cmdObj.firstElement().valuestr(); verbose = cmdObj["verbose"].trueValue(); jsMode = cmdObj["jsMode"].trueValue(); jsMaxKeys = 500000; reduceTriggerRatio = 2.0; maxInMemSize = 5 * 1024 * 1024; uassert( 13602 , "outType is no longer a valid option" , cmdObj["outType"].eoo() ); if ( cmdObj["out"].type() == String ) { finalShort = cmdObj["out"].String(); outType = REPLACE; } else if ( cmdObj["out"].type() == Object ) { BSONObj o = cmdObj["out"].embeddedObject(); BSONElement e = o.firstElement(); string t = e.fieldName(); if ( t == "normal" || t == "replace" ) { outType = REPLACE; finalShort = e.String(); } else if ( t == "merge" ) { outType = MERGE; finalShort = e.String(); } else if ( t == "reduce" ) { outType = REDUCE; finalShort = e.String(); } else if ( t == "inline" ) { outType = INMEMORY; } else { uasserted( 13522 , str::stream() << "unknown out specifier [" << t << "]" ); } if (o.hasElement("db")) { outDB = o["db"].String(); } } else { uasserted( 13606 , "'out' has to be a string or an object" ); } if ( outType != INMEMORY ) { // setup names tempLong = str::stream() << (outDB.empty() ? dbname : outDB) << ".tmp.mr." << cmdObj.firstElement().String() << "_" << JOB_NUMBER++; incLong = tempLong + "_inc"; finalLong = str::stream() << (outDB.empty() ? dbname : outDB) << "." << finalShort; } { // scope and code if ( cmdObj["scope"].type() == Object ) scopeSetup = cmdObj["scope"].embeddedObjectUserCheck(); mapper.reset( new JSMapper( cmdObj["map"] ) ); reducer.reset( new JSReducer( cmdObj["reduce"] ) ); if ( cmdObj["finalize"].type() && cmdObj["finalize"].trueValue() ) finalizer.reset( new JSFinalizer( cmdObj["finalize"] ) ); if ( cmdObj["mapparams"].type() == Array ) { mapParams = cmdObj["mapparams"].embeddedObjectUserCheck(); } } { // query options BSONElement q = cmdObj["query"]; if ( q.type() == Object ) filter = q.embeddedObjectUserCheck(); else uassert( 13608 , "query has to be blank or an Object" , ! q.trueValue() ); BSONElement s = cmdObj["sort"]; if ( s.type() == Object ) sort = s.embeddedObjectUserCheck(); else uassert( 13609 , "sort has to be blank or an Object" , ! s.trueValue() ); if ( cmdObj["limit"].isNumber() ) limit = cmdObj["limit"].numberLong(); else limit = 0; } }
bool run(OperationContext* txn, const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { DBDirectClient db; BSONElement e = jsobj.firstElement(); string toDeleteNs = dbname + '.' + e.valuestr(); LOG(0) << "CMD: reIndex " << toDeleteNs << endl; Lock::DBWrite dbXLock(txn->lockState(), dbname); Client::Context ctx(toDeleteNs); Collection* collection = ctx.db()->getCollection( txn, toDeleteNs ); if ( !collection ) { errmsg = "ns not found"; return false; } BackgroundOperation::assertNoBgOpInProgForNs( toDeleteNs ); std::vector<BSONObj> indexesInProg = stopIndexBuilds(txn, ctx.db(), jsobj); list<BSONObj> all; auto_ptr<DBClientCursor> i = db.query( dbname + ".system.indexes" , BSON( "ns" << toDeleteNs ) , 0 , 0 , 0 , QueryOption_SlaveOk ); BSONObjBuilder b; while ( i->more() ) { const BSONObj spec = i->next().removeField("v").getOwned(); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { errmsg = str::stream() << "Cannot rebuild index " << spec << ": " << keyStatus.reason() << " For more info see http://dochub.mongodb.org/core/index-validation"; return false; } b.append( BSONObjBuilder::numStr( all.size() ) , spec ); all.push_back( spec ); } result.appendNumber( "nIndexesWas", collection->getIndexCatalog()->numIndexesTotal() ); Status s = collection->getIndexCatalog()->dropAllIndexes(txn, true); if ( !s.isOK() ) { errmsg = "dropIndexes failed"; return appendCommandStatus( result, s ); } for ( list<BSONObj>::iterator i=all.begin(); i!=all.end(); i++ ) { BSONObj o = *i; LOG(1) << "reIndex ns: " << toDeleteNs << " index: " << o << endl; Status s = collection->getIndexCatalog()->createIndex(txn, o, false); if ( !s.isOK() ) return appendCommandStatus( result, s ); } result.append( "nIndexes" , (int)all.size() ); result.appendArray( "indexes" , b.obj() ); IndexBuilder::restoreIndexes(indexesInProg); return true; }
int run() { string ns; const bool csv = hasParam( "csv" ); const bool jsonArray = hasParam( "jsonArray" ); ostream *outPtr = &cout; string outfile = getParam( "out" ); auto_ptr<ofstream> fileStream; if ( hasParam( "out" ) ) { size_t idx = outfile.rfind( "/" ); if ( idx != string::npos ) { string dir = outfile.substr( 0 , idx + 1 ); boost::filesystem::create_directories( dir ); } ofstream * s = new ofstream( outfile.c_str() , ios_base::out ); fileStream.reset( s ); outPtr = s; if ( ! s->good() ) { cerr << "couldn't open [" << outfile << "]" << endl; return -1; } } ostream &out = *outPtr; BSONObj * fieldsToReturn = 0; BSONObj realFieldsToReturn; try { ns = getNS(); } catch (...) { printHelp(cerr); return 1; } auth(); if ( hasParam( "fields" ) || csv ) { needFields(); // we can't use just _fieldsObj since we support everything getFieldDotted does set<string> seen; BSONObjBuilder b; BSONObjIterator i( _fieldsObj ); while ( i.more() ){ BSONElement e = i.next(); string f = str::before( e.fieldName() , '.' ); if ( seen.insert( f ).second ) b.append( f , 1 ); } realFieldsToReturn = b.obj(); fieldsToReturn = &realFieldsToReturn; } if ( csv && _fields.size() == 0 ) { cerr << "csv mode requires a field list" << endl; return -1; } Query q( getParam( "query" , "" ) ); if ( q.getFilter().isEmpty() && !hasParam("dbpath")) q.snapshot(); bool slaveOk = _params["slaveOk"].as<bool>(); auto_ptr<DBClientCursor> cursor = conn().query( ns.c_str() , q , 0 , 0 , fieldsToReturn , ( slaveOk ? QueryOption_SlaveOk : 0 ) | QueryOption_NoCursorTimeout ); if ( csv ) { for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) { if ( i != _fields.begin() ) out << ","; out << *i; } out << endl; } if (jsonArray) out << '['; long long num = 0; while ( cursor->more() ) { num++; BSONObj obj = cursor->next(); if ( csv ) { for ( vector<string>::iterator i=_fields.begin(); i != _fields.end(); i++ ) { if ( i != _fields.begin() ) out << ","; const BSONElement & e = obj.getFieldDotted(i->c_str()); if ( ! e.eoo() ) { out << csvString(e); } } out << endl; } else { if (jsonArray && num != 1) out << ','; out << obj.jsonString(); if (!jsonArray) out << endl; } } if (jsonArray) out << ']' << endl; cerr << "exported " << num << " records" << endl; return 0; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { BSONElement argElt = cmdObj["stageDebug"]; if (argElt.eoo() || !argElt.isABSONObj()) { return false; } BSONObj argObj = argElt.Obj(); // Pull out the collection name. BSONElement collElt = argObj["collection"]; if (collElt.eoo() || (String != collElt.type())) { return false; } const NamespaceString nss(dbname, collElt.String()); uassert(ErrorCodes::InvalidNamespace, str::stream() << nss.toString() << " is not a valid namespace", nss.isValid()); // Need a context to get the actual Collection* // TODO A write lock is currently taken here to accommodate stages that perform writes // (e.g. DeleteStage). This should be changed to use a read lock for read-only // execution trees. ScopedTransaction transaction(txn, MODE_IX); AutoGetCollection autoColl(txn, nss, MODE_IX); // Make sure the collection is valid. Collection* collection = autoColl.getCollection(); uassert(ErrorCodes::NamespaceNotFound, str::stream() << "Couldn't find collection " << nss.ns(), collection); // Pull out the plan BSONElement planElt = argObj["plan"]; if (planElt.eoo() || !planElt.isABSONObj()) { return false; } BSONObj planObj = planElt.Obj(); // Parse the plan into these. OwnedPointerVector<MatchExpression> exprs; unique_ptr<WorkingSet> ws(new WorkingSet()); PlanStage* userRoot = parseQuery(txn, collection, planObj, ws.get(), &exprs); uassert(16911, "Couldn't parse plan from " + cmdObj.toString(), NULL != userRoot); // Add a fetch at the top for the user so we can get obj back for sure. // TODO: Do we want to do this for the user? I think so. unique_ptr<PlanStage> rootFetch = make_unique<FetchStage>(txn, ws.get(), userRoot, nullptr, collection); auto statusWithPlanExecutor = PlanExecutor::make( txn, std::move(ws), std::move(rootFetch), collection, PlanExecutor::YIELD_AUTO); fassert(28536, statusWithPlanExecutor.getStatus()); std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); BSONArrayBuilder resultBuilder(result.subarrayStart("results")); BSONObj obj; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { resultBuilder.append(obj); } resultBuilder.done(); if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { error() << "Plan executor error during StageDebug command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::getWinningPlanStats(exec.get()); return appendCommandStatus( result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during " << "StageDebug command: " << WorkingSetCommon::toStatusString(obj))); } return true; }
bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool){ string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe(); MRSetup mr( dbname , cmdObj.firstElement().embeddedObjectUserCheck() , false ); set<ServerAndQuery> servers; BSONObjBuilder shardCounts; map<string,long long> counts; BSONObj shards = cmdObj["shards"].embeddedObjectUserCheck(); vector< auto_ptr<DBClientCursor> > shardCursors; { // parse per shard results BSONObjIterator i( shards ); while ( i.more() ){ BSONElement e = i.next(); string shard = e.fieldName(); BSONObj res = e.embeddedObjectUserCheck(); uassert( 10078 , "something bad happened" , shardedOutputCollection == res["result"].valuestrsafe() ); servers.insert( shard ); shardCounts.appendAs( res["counts"] , shard ); BSONObjIterator j( res["counts"].embeddedObjectUserCheck() ); while ( j.more() ){ BSONElement temp = j.next(); counts[temp.fieldName()] += temp.numberLong(); } } } DBDirectClient db; { // reduce from each stream BSONObj sortKey = BSON( "_id" << 1 ); ParallelSortClusteredCursor cursor( servers , dbname + "." + shardedOutputCollection , Query().sort( sortKey ) ); cursor.init(); auto_ptr<Scope> s = globalScriptEngine->getPooledScope( dbname ); s->localConnect( dbname.c_str() ); ScriptingFunction reduceFunction = s->createFunction( mr.reduceCode.c_str() ); ScriptingFunction finalizeFunction = 0; if ( mr.finalizeCode.size() ) finalizeFunction = s->createFunction( mr.finalizeCode.c_str() ); BSONList values; result.append( "result" , mr.finalShort ); while ( cursor.more() ){ BSONObj t = cursor.next().getOwned(); if ( values.size() == 0 ){ values.push_back( t ); continue; } if ( t.woSortOrder( *(values.begin()) , sortKey ) == 0 ){ values.push_back( t ); continue; } db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) ); values.clear(); values.push_back( t ); } if ( values.size() ) db.insert( mr.tempLong , reduceValues( values , s.get() , reduceFunction , 1 , finalizeFunction ) ); } long long finalCount = mr.renameIfNeeded( db ); log(0) << " mapreducefinishcommand " << mr.finalLong << " " << finalCount << endl; for ( set<ServerAndQuery>::iterator i=servers.begin(); i!=servers.end(); i++ ){ ScopedDbConnection conn( i->_server ); conn->dropCollection( dbname + "." + shardedOutputCollection ); conn.done(); } result.append( "shardCounts" , shardCounts.obj() ); { BSONObjBuilder c; for ( map<string,long long>::iterator i=counts.begin(); i!=counts.end(); i++ ){ c.append( i->first , i->second ); } result.append( "counts" , c.obj() ); } return 1; }
/** * Also called by db/ops/query.cpp. This is the new getMore entry point. */ QueryResult* newGetMore(const char* ns, int ntoreturn, long long cursorid, CurOp& curop, int pass, bool& exhaust, bool* isCursorAuthorized) { exhaust = false; int bufSize = 512 + sizeof(QueryResult) + MaxBytesToReturnToClientAtOnce; BufBuilder bb(bufSize); bb.skip(sizeof(QueryResult)); // This is a read lock. TODO: There is a cursor flag for not needing this. Do we care? Client::ReadContext ctx(ns); //log() << "running getMore in new system, cursorid " << cursorid << endl; // This checks to make sure the operation is allowed on a replicated node. Since we are not // passing in a query object (necessary to check SlaveOK query option), the only state where // reads are allowed is PRIMARY (or master in master/slave). This function uasserts if // reads are not okay. replVerifyReadsOk(); // A pin performs a CC lookup and if there is a CC, increments the CC's pin value so it // doesn't time out. Also informs ClientCursor that there is somebody actively holding the // CC, so don't delete it. ClientCursorPin ccPin(cursorid); ClientCursor* cc = ccPin.c(); // These are set in the QueryResult msg we return. int resultFlags = ResultFlag_AwaitCapable; int numResults = 0; int startingResult = 0; if (NULL == cc) { cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { // Quote: check for spoofing of the ns such that it does not match the one originally // there for the cursor uassert(17011, "auth error", str::equals(ns, cc->ns().c_str())); *isCursorAuthorized = true; // TODO: fail point? // If the operation that spawned this cursor had a time limit set, apply leftover // time to this getmore. curop.setMaxTimeMicros(cc->getLeftoverMaxTimeMicros()); killCurrentOp.checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // TODO: // curop.debug().query = BSONForQuery // curop.setQuery(curop.debug().query); // TODO: What is pass? if (0 == pass) { cc->updateSlaveLocation(curop); } CollectionMetadataPtr collMetadata = cc->getCollMetadata(); // If we're replaying the oplog, we save the last time that we read. OpTime slaveReadTill; // What number result are we starting at? Used to fill out the reply. startingResult = cc->pos(); // What gives us results. Runner* runner = cc->getRunner(); const int queryOptions = cc->queryOptions(); // Get results out of the runner. runner->restoreState(); BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { // If we're sharded make sure that we don't return any data that hasn't been // migrated off of our shard yet. if (collMetadata) { KeyPattern kp(collMetadata->getKeyPattern()); if (!collMetadata->keyBelongsToMe(kp.extractSingleKey(obj))) { continue; } } // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (queryOptions & QueryOption_OplogReplay) { BSONElement e = obj["ts"]; if (Date == e.type() || Timestamp == e.type()) { slaveReadTill = e._opTime(); } } if ((numResults && numResults >= ntoreturn) || bb.len() > MaxBytesToReturnToClientAtOnce) { break; } } if (Runner::RUNNER_EOF == state && 0 == numResults && (queryOptions & QueryOption_CursorTailable) && (queryOptions & QueryOption_AwaitData) && (pass < 1000)) { // If the cursor is tailable we don't kill it if it's eof. We let it try to get // data some # of times first. return 0; } else if (Runner::RUNNER_DEAD == state || Runner::RUNNER_EOF == state) { ccPin.free(); // cc is now invalid, as is the runner cursorid = 0; cc = NULL; } else { // Continue caching the ClientCursor. cc->incPos(numResults); runner->saveState(); // Possibly note slave's position in the oplog. if ((queryOptions & QueryOption_OplogReplay) && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } exhaust = (queryOptions & QueryOption_Exhaust); // If the getmore had a time limit, remaining time is "rolled over" back to the // cursor (for use by future getmore ops). cc->setLeftoverMaxTimeMicros( curop.getRemainingMaxTimeMicros() ); } } QueryResult* qr = reinterpret_cast<QueryResult*>(bb.buf()); qr->len = bb.len(); qr->setOperation(opReply); qr->_resultFlags() = resultFlags; qr->cursorId = cursorid; qr->startingFrom = startingResult; qr->nReturned = numResults; bb.decouple(); return qr; }