Status getRunner(Collection* collection, const std::string& ns, const BSONObj& unparsedQuery, Runner** outRunner, CanonicalQuery** outCanonicalQuery, size_t plannerOptions) { if (!collection) { *outCanonicalQuery = NULL; *outRunner = new EOFRunner(NULL, ns); return Status::OK(); } if (!CanonicalQuery::isSimpleIdQuery(unparsedQuery) || !collection->getIndexCatalog()->findIdIndex()) { Status status = CanonicalQuery::canonicalize( collection->ns(), unparsedQuery, outCanonicalQuery); if (!status.isOK()) return status; return getRunner(collection, *outCanonicalQuery, outRunner, plannerOptions); } *outCanonicalQuery = NULL; *outRunner = new IDHackRunner(collection, unparsedQuery["_id"].wrap()); return Status::OK(); }
Status getRunner(Collection* collection, const std::string& ns, const BSONObj& unparsedQuery, Runner** outRunner, CanonicalQuery** outCanonicalQuery, size_t plannerOptions) { if (!collection) { LOG(2) << "Collection " << ns << " does not exist." << " Using EOF runner: " << unparsedQuery.toString(); *outCanonicalQuery = NULL; *outRunner = new EOFRunner(NULL, ns); return Status::OK(); } if (!CanonicalQuery::isSimpleIdQuery(unparsedQuery) || !collection->getIndexCatalog()->findIdIndex()) { const WhereCallbackReal whereCallback(collection->ns().db()); Status status = CanonicalQuery::canonicalize( collection->ns(), unparsedQuery, outCanonicalQuery, whereCallback); if (!status.isOK()) return status; return getRunner(collection, *outCanonicalQuery, outRunner, plannerOptions); } LOG(2) << "Using idhack: " << unparsedQuery.toString(); *outCanonicalQuery = NULL; *outRunner = new IDHackRunner(collection, unparsedQuery["_id"].wrap()); return Status::OK(); }
void run(int p) { hits = 0; done = 0; port = p; ServerSocket ss(port, 10); if(ss.isListening()) { TRACE("Listening on port %d\n", port); while(!done) { Socket *sock = ss.accept(); hits++; TRACE("New connection %d\n", hits); BlockSocket *bs = new BlockSocket(sock); BlockRunner *rr = getRunner(); rr->bs = bs; rr->start(); } TRACE("Exiting ...\n"); for(int i = 0; i < (4 * 5); i++) { int n = r_running.length(); if(n == 0) break; TRACE(" waiting for %d threads ...\n", n); finish_runners(); System::msleep(250); } } }
vector<BSONObj> Helpers::findAll( const string& ns , const BSONObj& query ) { Lock::assertAtLeastReadLocked(ns); Client::Context ctx(ns); CanonicalQuery* cq; const NamespaceString nss(ns); const WhereCallbackReal whereCallback(nss.db()); uassert(17236, "Could not canonicalize " + query.toString(), CanonicalQuery::canonicalize(ns, query, &cq, whereCallback).isOK()); Runner* rawRunner; uassert(17237, "Could not get runner for query " + query.toString(), getRunner(ctx.db()->getCollection( ns ), cq, &rawRunner).isOK()); vector<BSONObj> all; auto_ptr<Runner> runner(rawRunner); Runner::RunnerState state; BSONObj obj; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { all.push_back(obj); } return all; }
/* fetch a single object from collection ns that matches query set your db SavedContext first */ DiskLoc Helpers::findOne(OperationContext* txn, Collection* collection, const BSONObj &query, bool requireIndex) { if ( !collection ) return DiskLoc(); CanonicalQuery* cq; const WhereCallbackReal whereCallback(collection->ns().db()); massert(17244, "Could not canonicalize " + query.toString(), CanonicalQuery::canonicalize(collection->ns(), query, &cq, whereCallback).isOK()); Runner* rawRunner; size_t options = requireIndex ? QueryPlannerParams::NO_TABLE_SCAN : QueryPlannerParams::DEFAULT; massert(17245, "Could not get runner for query " + query.toString(), getRunner(collection, cq, &rawRunner, options).isOK()); auto_ptr<Runner> runner(rawRunner); Runner::RunnerState state; DiskLoc loc; if (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &loc))) { return loc; } return DiskLoc(); }
void LowestFreeEnergy::in(spipe::common::StructureData & data) { if(myLowest) { ssc::Structure * structure = data.getStructure(); double * internalEnergy = structure->getProperty(structure_properties::general::ENERGY_INTERNAL); if(internalEnergy && *internalEnergy < *myLowest->getStructure()->getProperty(structure_properties::general::ENERGY_INTERNAL)) { getRunner()->dropData(*myLowest); myLowest = &data; } else getRunner()->dropData(data); } else myLowest = &data; }
/** * For a given query, get a runner. The runner could be a SingleSolutionRunner, a * CachedQueryRunner, or a MultiPlanRunner, depending on the cache/query solver/etc. */ Status getRunner(CanonicalQuery* rawCanonicalQuery, Runner** out, size_t plannerOptions) { verify(rawCanonicalQuery); Database* db = cc().database(); verify(db); return getRunner(db->getCollection(rawCanonicalQuery->ns()), rawCanonicalQuery, out, plannerOptions); }
Status getRunnerCount(Collection* collection, const BSONObj& query, const BSONObj& hintObj, Runner** out) { verify(collection); CanonicalQuery* cq; uassertStatusOK(CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), 0, 0, hintObj, &cq)); return getRunner(collection, cq, out, QueryPlannerParams::PRIVATE_IS_COUNT); }
/* fetch a single object from collection ns that matches query set your db SavedContext first */ DiskLoc Helpers::findOne(const StringData& ns, const BSONObj &query, bool requireIndex) { CanonicalQuery* cq; massert(17244, "Could not canonicalize " + query.toString(), CanonicalQuery::canonicalize(ns.toString(), query, &cq).isOK()); Runner* rawRunner; size_t options = requireIndex ? QueryPlannerParams::NO_TABLE_SCAN : QueryPlannerParams::DEFAULT; massert(17245, "Could not get runner for query " + query.toString(), getRunner(cq, &rawRunner, options).isOK()); auto_ptr<Runner> runner(rawRunner); Runner::RunnerState state; DiskLoc loc; if (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &loc))) { return loc; } return DiskLoc(); }
Status getOplogStartHack(CanonicalQuery* cq, Runner** runnerOut) { // Make an oplog start finding stage. WorkingSet* oplogws = new WorkingSet(); OplogStart* stage = new OplogStart(cq->ns(), cq->root(), oplogws); // Takes ownership of ws and stage. auto_ptr<InternalRunner> runner(new InternalRunner(cq->ns(), stage, oplogws)); runner->setYieldPolicy(Runner::YIELD_AUTO); // The stage returns a DiskLoc of where to start. DiskLoc startLoc; Runner::RunnerState state = runner->getNext(NULL, &startLoc); // This is normal. The start of the oplog is the beginning of the collection. if (Runner::RUNNER_EOF == state) { return getRunner(cq, runnerOut); } // This is not normal. An error was encountered. if (Runner::RUNNER_ADVANCED != state) { return Status(ErrorCodes::InternalError, "quick oplog start location had error...?"); } // cout << "diskloc is " << startLoc.toString() << endl; // Build our collection scan... CollectionScanParams params; params.ns = cq->ns(); params.start = startLoc; params.direction = CollectionScanParams::FORWARD; params.tailable = cq->getParsed().hasOption(QueryOption_CursorTailable); WorkingSet* ws = new WorkingSet(); CollectionScan* cs = new CollectionScan(params, ws, cq->root()); // Takes ownership of cq, cs, ws. *runnerOut = new SingleSolutionRunner(cq, NULL, cs, ws); return Status::OK(); }
/** * This is called by db/ops/query.cpp. This is the entry point for answering a query. */ std::string newRunQuery(CanonicalQuery* cq, CurOp& curop, Message &result) { QLOG() << "Running query on new system: " << cq->toString(); // This is a read lock. Client::ReadContext ctx(cq->ns(), storageGlobalParams.dbpath); // Parse, canonicalize, plan, transcribe, and get a runner. Runner* rawRunner = NULL; // We use this a lot below. const LiteParsedQuery& pq = cq->getParsed(); // Need to call cq->toString() now, since upon error getRunner doesn't guarantee // cq is in a consistent state. string cqStr = cq->toString(); // We'll now try to get the query runner that will execute this query for us. There // are a few cases in which we know upfront which runner we should get and, therefore, // we shortcut the selection process here. // // (a) If the query is over a collection that doesn't exist, we get a special runner // that's is so (a runner) which doesn't return results, the EOFRunner. // // (b) if the query is a replication's initial sync one, we get a SingleSolutinRunner // that uses a specifically designed stage that skips extents faster (see details in // exec/oplogstart.h) // // Otherwise we go through the selection of which runner is most suited to the // query + run-time context at hand. Status status = Status::OK(); if (ctx.ctx().db()->getCollection(cq->ns()) == NULL) { rawRunner = new EOFRunner(cq, cq->ns()); } else if (pq.hasOption(QueryOption_OplogReplay)) { status = getOplogStartHack(cq, &rawRunner); } else { // Takes ownership of cq. size_t options = QueryPlannerParams::DEFAULT; if (shardingState.needCollectionMetadata(pq.ns())) { options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; } status = getRunner(cq, &rawRunner, options); } if (!status.isOK()) { uasserted(17007, "Couldn't get runner for query because: " + status.reason() + " query is " + cqStr); } verify(NULL != rawRunner); auto_ptr<Runner> runner(rawRunner); // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState.getVersion(cq->ns()); // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); killCurrentOp.checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. replVerifyReadsOk(&pq); // If this exists, the collection is sharded. // If it doesn't exist, we can assume we're not sharded. // If we're sharded, we might encounter data that is not consistent with our sharding state. // We must ignore this data. CollectionMetadataPtr collMetadata; if (!shardingState.needCollectionMetadata(pq.ns())) { collMetadata = CollectionMetadataPtr(); } else { collMetadata = shardingState.getCollectionMetadata(pq.ns()); } // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(32768); bb.skip(sizeof(QueryResult)); // How many results have we obtained from the runner? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. OpTime slaveReadTill; // Do we save the Runner in a ClientCursor for getMore calls later? bool saveClientCursor = false; // We turn on auto-yielding for the runner here. The runner registers itself with the // active runners list in ClientCursor. ClientCursor::registerRunner(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); auto_ptr<DeregisterEvenIfUnderlyingCodeThrows> safety( new DeregisterEvenIfUnderlyingCodeThrows(runner.get())); BSONObj obj; Runner::RunnerState state; // uint64_t numMisplacedDocs = 0; // set this outside loop. we will need to use this both within loop and when deciding // to fill in explain information const bool isExplain = pq.isExplain(); while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { // Add result to output buffer. This is unnecessary if explain info is requested if (!isExplain) { bb.appendBuf((void*)obj.objdata(), obj.objsize()); } // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.hasOption(QueryOption_OplogReplay)) { BSONElement e = obj["ts"]; if (Date == e.type() || Timestamp == e.type()) { slaveReadTill = e._opTime(); } } // TODO: only one type of 2d search doesn't support this. We need a way to pull it out // of CanonicalQuery. :( const bool supportsGetMore = true; if (isExplain) { if (enoughForExplain(pq, numResults)) { break; } } else if (!supportsGetMore && (enough(pq, numResults) || bb.len() >= MaxBytesToReturnToClientAtOnce)) { break; } else if (enoughForFirstBatch(pq, numResults, bb.len())) { QLOG() << "Enough for first batch, wantMore=" << pq.wantMore() << " numToReturn=" << pq.getNumToReturn() << " numResults=" << numResults << endl; // If only one result requested assume it's a findOne() and don't save the cursor. if (pq.wantMore() && 1 != pq.getNumToReturn()) { QLOG() << " runner EOF=" << runner->isEOF() << endl; saveClientCursor = !runner->isEOF(); } break; } } // If we cache the runner later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the runner later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the runner. safety.reset(); // Caller expects exceptions thrown in certain cases: // * in-memory sort using too much RAM. if (Runner::RUNNER_ERROR == state) { uasserted(17144, "Runner error, memory limit for sort probably exceeded"); } // Why save a dead runner? if (Runner::RUNNER_DEAD == state) { saveClientCursor = false; } else if (pq.hasOption(QueryOption_CursorTailable)) { // If we're tailing a capped collection, we don't bother saving the cursor if the // collection is empty. Otherwise, the semantics of the tailable cursor is that the // client will keep trying to read from it. So we'll keep it around. Collection* collection = ctx.ctx().db()->getCollection(cq->ns()); if (collection && collection->numRecords() != 0 && pq.getNumToReturn() != 1) { saveClientCursor = true; } } // TODO(greg): This will go away soon. if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongos can resend at this point throw SendStaleConfigException(pq.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(pq.ns())); } // Append explain information to query results by asking the runner to produce them. if (isExplain) { TypeExplain* bareExplain; Status res = runner->getExplainPlan(&bareExplain); if (!res.isOK()) { error() << "could not produce explain of query '" << pq.getFilter() << "', error: " << res.reason(); // If numResults and the data in bb don't correspond, we'll crash later when rooting // through the reply msg. BSONObj emptyObj; bb.appendBuf((void*)emptyObj.objdata(), emptyObj.objsize()); // The explain output is actually a result. numResults = 1; // TODO: we can fill out millis etc. here just fine even if the plan screwed up. } else { boost::scoped_ptr<TypeExplain> explain(bareExplain); // Fill in the missing run-time fields in explain, starting with propeties of // the process running the query. std::string server = mongoutils::str::stream() << getHostNameCached() << ":" << serverGlobalParams.port; explain->setServer(server); // We might have skipped some results due to chunk migration etc. so our count is // correct. explain->setN(numResults); // Clock the whole operation. explain->setMillis(curop.elapsedMillis()); BSONObj explainObj = explain->toBSON(); bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize()); // The explain output is actually a result. numResults = 1; } } long long ccId = 0; if (saveClientCursor) { // We won't use the runner until it's getMore'd. runner->saveState(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(runner.get(), cq->getParsed().getOptions(), cq->getParsed().getFilter()); ccId = cc->cursorid(); QLOG() << "caching runner with cursorid " << ccId << " after returning " << numResults << " results" << endl; // ClientCursor takes ownership of runner. Release to make sure it's not deleted. runner.release(); // TODO document if (pq.hasOption(QueryOption_OplogReplay) && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.hasOption(QueryOption_Exhaust)) { curop.debug().exhaust = true; } // Set attributes for getMore. cc->setCollMetadata(collMetadata); cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); } else { QLOG() << "not caching runner but returning " << numResults << " results\n"; } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult* qr = static_cast<QueryResult*>(result.header()); qr->cursorId = ccId; curop.debug().cursorid = (0 == ccId ? -1 : ccId); qr->setResultFlagsToOk(); qr->setOperation(opReply); qr->startingFrom = 0; qr->nReturned = numResults; curop.debug().ntoskip = pq.getSkip(); curop.debug().nreturned = numResults; // curop.debug().exhaust is set above. return curop.debug().exhaust ? pq.ns() : ""; }
/* * Runs the command object cmdobj on the db with name dbname and puts result in result. * @param dbname, name of db * @param cmdobj, object that contains entire command * @param options * @param errmsg, reference to error message * @param result, reference to builder for result * @param fromRepl * @return true if successful, false otherwise */ bool FTSCommand::_run(const string& dbname, BSONObj& cmdObj, int cmdOptions, const string& ns, const string& searchString, string language, // "" for not-set int limit, BSONObj& filter, BSONObj& projection, string& errmsg, BSONObjBuilder& result ) { Timer comm; // Rewrite the cmd as a normal query. BSONObjBuilder queryBob; queryBob.appendElements(filter); BSONObjBuilder textBob; textBob.append("$search", searchString); if (!language.empty()) { textBob.append("$language", language); } queryBob.append("$text", textBob.obj()); // This is the query we exec. BSONObj queryObj = queryBob.obj(); // We sort by the score. BSONObj sortSpec = BSON("$s" << BSON("$meta" << "text")); // We also project the score into the document and strip it out later during the reformatting // of the results. BSONObjBuilder projBob; projBob.appendElements(projection); projBob.appendElements(sortSpec); BSONObj projObj = projBob.obj(); CanonicalQuery* cq; if (!CanonicalQuery::canonicalize(ns, queryObj, sortSpec, projObj, 0, limit, BSONObj(), &cq).isOK()) { errmsg = "Can't parse filter / create query"; return false; } Runner* rawRunner; if (!getRunner(cq, &rawRunner, 0).isOK()) { errmsg = "can't get query runner"; return false; } auto_ptr<Runner> runner(rawRunner); BSONArrayBuilder resultBuilder(result.subarrayStart("results")); // Quoth: "leave a mb for other things" int resultSize = 1024 * 1024; int numReturned = 0; BSONObj obj; while (Runner::RUNNER_ADVANCED == runner->getNext(&obj, NULL)) { if ((resultSize + obj.objsize()) >= BSONObjMaxUserSize) { break; } // We return an array of results. Add another element. BSONObjBuilder oneResultBuilder(resultBuilder.subobjStart()); oneResultBuilder.append("score", obj["$s"].number()); // Strip out the score from the returned obj. BSONObjIterator resIt(obj); BSONObjBuilder resBob; while (resIt.more()) { BSONElement elt = resIt.next(); if (!mongoutils::str::equals("$s", elt.fieldName())) { resBob.append(elt); } } oneResultBuilder.append("obj", resBob.obj()); BSONObj addedArrayObj = oneResultBuilder.done(); resultSize += addedArrayObj.objsize(); numReturned++; } resultBuilder.done(); // returns some stats to the user BSONObjBuilder stats(result.subobjStart("stats")); // Fill in nscanned from the explain. TypeExplain* bareExplain; Status res = runner->getExplainPlan(&bareExplain); if (res.isOK()) { auto_ptr<TypeExplain> explain(bareExplain); stats.append("nscanned", explain->getNScanned()); stats.append("nscannedObjects", explain->getNScannedObjects()); } stats.appendNumber( "n" , numReturned ); stats.append( "timeMicros", (int)comm.micros() ); stats.done(); return true; }
int StaticRunner::launch() { //set execution thread in java if (!initialJavaHooks && getScilabMode() != SCILAB_NWNI) { initialJavaHooks = true; // Execute the initial hooks registered in Scilab.java ExecuteInitialHooks(); } int iRet = 0; // get the runner to execute std::unique_ptr<Runner> runMe(getRunner()); // set if the current comment is interruptible setInterruptibleCommand(runMe->isInterruptible()); debugger::DebuggerMagager* manager = debugger::DebuggerMagager::getInstance(); ConfigVariable::resetExecutionBreak(); int oldMode = ConfigVariable::getPromptMode(); symbol::Context* pCtx = symbol::Context::getInstance(); int scope = pCtx->getScopeLevel(); // a TCL command display nothing int iOldPromptMode = 0; if (runMe->getCommandOrigin() == TCLSCI) { iOldPromptMode = ConfigVariable::getPromptMode(); ConfigVariable::setPromptMode(-1); } try { int level = ConfigVariable::getRecursionLevel(); try { runMe->getProgram()->accept(*(runMe->getVisitor())); } catch (const ast::RecursionException& re) { // management of pause if (ConfigVariable::getPauseLevel()) { ConfigVariable::DecreasePauseLevel(); throw re; } //close opened scope during try while (pCtx->getScopeLevel() > scope) { pCtx->scope_end(); } //decrease recursion to init value and close where while (ConfigVariable::getRecursionLevel() > level) { ConfigVariable::where_end(); ConfigVariable::decreaseRecursion(); } ConfigVariable::resetWhereError(); ConfigVariable::setPromptMode(oldMode); //print msg about recursion limit and trigger an error wchar_t sz[1024]; os_swprintf(sz, 1024, _W("Recursion limit reached (%d).\n").data(), ConfigVariable::getRecursionLimit()); throw ast::InternalError(sz); } } catch (const ast::InternalError& se) { if (runMe->getCommandOrigin() == TCLSCI) { ConfigVariable::setPromptMode(iOldPromptMode); } std::wostringstream ostr; ConfigVariable::whereErrorToString(ostr); scilabErrorW(ostr.str().c_str()); scilabErrorW(se.GetErrorMessage().c_str()); ConfigVariable::resetWhereError(); iRet = 1; } catch (const ast::InternalAbort& ia) { if (runMe->getCommandOrigin() == TCLSCI) { ConfigVariable::setPromptMode(iOldPromptMode); } // management of pause if (ConfigVariable::getPauseLevel()) { ConfigVariable::DecreasePauseLevel(); throw ia; } // close all scope before return to console scope symbol::Context* pCtx = symbol::Context::getInstance(); while (pCtx->getScopeLevel() > scope) { pCtx->scope_end(); } // send the good signal about the end of execution sendExecDoneSignal(runMe.get()); //clean debugger step flag if debugger is not interrupted ( end of debug ) manager->resetStep(); throw ia; } if (runMe->getCommandOrigin() == TCLSCI) { ConfigVariable::setPromptMode(iOldPromptMode); } if (getScilabMode() != SCILAB_NWNI && getScilabMode() != SCILAB_API) { char *cwd = NULL; int err = 0; UpdateBrowseVar(); cwd = scigetcwd(&err); if (cwd) { FileBrowserChDir(cwd); FREE(cwd); } } // reset error state when new prompt occurs ConfigVariable::resetError(); // send the good signal about the end of execution sendExecDoneSignal(runMe.get()); //clean debugger step flag if debugger is not interrupted ( end of debug ) manager->resetStep(); return iRet; }
bool group( OperationContext* txn, Database* db, const std::string& ns, const BSONObj& query, BSONObj keyPattern, const std::string& keyFunctionCode, const std::string& reduceCode, const char * reduceScope, BSONObj initial, const std::string& finalize, string& errmsg, BSONObjBuilder& result ) { const string userToken = ClientBasic::getCurrent()->getAuthorizationSession() ->getAuthenticatedUserNamesToken(); auto_ptr<Scope> s = globalScriptEngine->getPooledScope(db->name(), "group" + userToken); if ( reduceScope ) s->init( reduceScope ); s->setObject( "$initial" , initial , true ); s->exec( "$reduce = " + reduceCode , "$group reduce setup" , false , true , true , 100 ); s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 ); ScriptingFunction f = s->createFunction( "function(){ " " if ( $arr[n] == null ){ " " next = {}; " " Object.extend( next , $key ); " " Object.extend( next , $initial , true ); " " $arr[n] = next; " " next = null; " " } " " $reduce( obj , $arr[n] ); " "}" ); ScriptingFunction keyFunction = 0; if ( keyFunctionCode.size() ) { keyFunction = s->createFunction( keyFunctionCode.c_str() ); } double keysize = keyPattern.objsize() * 3; double keynum = 1; Collection* collection = db->getCollection( txn, ns ); const WhereCallbackReal whereCallback(txn, StringData(db->name())); map<BSONObj,int,BSONObjCmp> map; list<BSONObj> blah; if (collection) { CanonicalQuery* cq; if (!CanonicalQuery::canonicalize(ns, query, &cq, whereCallback).isOK()) { uasserted(17212, "Can't canonicalize query " + query.toString()); return 0; } Runner* rawRunner; if (!getRunner(txn,collection, cq, &rawRunner).isOK()) { uasserted(17213, "Can't get runner for query " + query.toString()); return 0; } auto_ptr<Runner> runner(rawRunner); const ScopedRunnerRegistration safety(runner.get()); BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { BSONObj key = getKey(obj , keyPattern , keyFunction , keysize / keynum, s.get() ); keysize += key.objsize(); keynum++; int& n = map[key]; if ( n == 0 ) { n = map.size(); s->setObject( "$key" , key , true ); uassert(17203, "group() can't handle more than 20000 unique keys", n <= 20000 ); } s->setObject( "obj" , obj , true ); s->setNumber( "n" , n - 1 ); if ( s->invoke( f , 0, 0 , 0 , true ) ) { throw UserException(17214, (string)"reduce invoke failed: " + s->getError()); } } } if (!finalize.empty()) { s->exec( "$finalize = " + finalize , "$group finalize define" , false , true , true , 100 ); ScriptingFunction g = s->createFunction( "function(){ " " for(var i=0; i < $arr.length; i++){ " " var ret = $finalize($arr[i]); " " if (ret !== undefined) " " $arr[i] = ret; " " } " "}" ); s->invoke( g , 0, 0 , 0 , true ); } result.appendArray( "retval" , s->getObject( "$arr" ) ); result.append( "count" , keynum - 1 ); result.append( "keys" , (int)(map.size()) ); s->exec( "$arr = [];" , "$group reduce setup 2" , false , true , true , 100 ); s->gc(); return true; }
boost::shared_ptr<Runner> PipelineD::prepareCursorSource( Collection* collection, const intrusive_ptr<Pipeline>& pPipeline, const intrusive_ptr<ExpressionContext>& pExpCtx) { // get the full "namespace" name const string& fullName = pExpCtx->ns.ns(); pExpCtx->opCtx->lockState()->assertAtLeastReadLocked(fullName); // We will be modifying the source vector as we go Pipeline::SourceContainer& sources = pPipeline->sources; // Inject a MongodImplementation to sources that need them. for (size_t i = 0; i < sources.size(); i++) { DocumentSourceNeedsMongod* needsMongod = dynamic_cast<DocumentSourceNeedsMongod*>(sources[i].get()); if (needsMongod) { needsMongod->injectMongodInterface( boost::make_shared<MongodImplementation>(pExpCtx)); } } if (!sources.empty() && sources.front()->isValidInitialSource()) { if (dynamic_cast<DocumentSourceMergeCursors*>(sources.front().get())) { // Enable the hooks for setting up authentication on the subsequent internal // connections we are going to create. This would normally have been done // when SetShardVersion was called, but since SetShardVersion is never called // on secondaries, this is needed. ShardedConnectionInfo::addHook(); } return boost::shared_ptr<Runner>(); // don't need a cursor } // Look for an initial match. This works whether we got an initial query or not. // If not, it results in a "{}" query, which will be what we want in that case. const BSONObj queryObj = pPipeline->getInitialQuery(); if (!queryObj.isEmpty()) { // This will get built in to the Cursor we'll create, so // remove the match from the pipeline sources.pop_front(); } // Find the set of fields in the source documents depended on by this pipeline. const DepsTracker deps = pPipeline->getDependencies(queryObj); // Passing query an empty projection since it is faster to use ParsedDeps::extractFields(). // This will need to change to support covering indexes (SERVER-12015). There is an // exception for textScore since that can only be retrieved by a query projection. const BSONObj projectionForQuery = deps.needTextScore ? deps.toProjection() : BSONObj(); /* Look for an initial sort; we'll try to add this to the Cursor we create. If we're successful in doing that (further down), we'll remove the $sort from the pipeline, because the documents will already come sorted in the specified order as a result of the index scan. */ intrusive_ptr<DocumentSourceSort> sortStage; BSONObj sortObj; if (!sources.empty()) { sortStage = dynamic_cast<DocumentSourceSort*>(sources.front().get()); if (sortStage) { // build the sort key sortObj = sortStage->serializeSortKey(/*explain*/false).toBson(); } } // Create the Runner. // // If we try to create a Runner that includes both the match and the // sort, and the two are incompatible wrt the available indexes, then // we don't get a Runner back. // // So we try to use both first. If that fails, try again, without the // sort. // // If we don't have a sort, jump straight to just creating a Runner // without the sort. // // If we are able to incorporate the sort into the Runner, remove it // from the head of the pipeline. // // LATER - we should be able to find this out before we create the // cursor. Either way, we can then apply other optimizations there // are tickets for, such as SERVER-4507. const size_t runnerOptions = QueryPlannerParams::DEFAULT | QueryPlannerParams::INCLUDE_SHARD_FILTER | QueryPlannerParams::NO_BLOCKING_SORT ; boost::shared_ptr<Runner> runner; bool sortInRunner = false; const WhereCallbackReal whereCallback(pExpCtx->ns.db()); if (sortStage) { CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(pExpCtx->ns, queryObj, sortObj, projectionForQuery, &cq, whereCallback); Runner* rawRunner; if (status.isOK() && getRunner(collection, cq, &rawRunner, runnerOptions).isOK()) { // success: The Runner will handle sorting for us using an index. runner.reset(rawRunner); sortInRunner = true; sources.pop_front(); if (sortStage->getLimitSrc()) { // need to reinsert coalesced $limit after removing $sort sources.push_front(sortStage->getLimitSrc()); } } } if (!runner.get()) { const BSONObj noSort; CanonicalQuery* cq; uassertStatusOK( CanonicalQuery::canonicalize(pExpCtx->ns, queryObj, noSort, projectionForQuery, &cq, whereCallback)); Runner* rawRunner; uassertStatusOK(getRunner(collection, cq, &rawRunner, runnerOptions)); runner.reset(rawRunner); } // DocumentSourceCursor expects a yielding Runner that has had its state saved. runner->saveState(); // Put the Runner into a DocumentSourceCursor and add it to the front of the pipeline. intrusive_ptr<DocumentSourceCursor> pSource = DocumentSourceCursor::create(fullName, runner, pExpCtx); // Note the query, sort, and projection for explain. pSource->setQuery(queryObj); if (sortInRunner) pSource->setSort(sortObj); pSource->setProjection(deps.toProjection(), deps.toParsedDeps()); while (!sources.empty() && pSource->coalesce(sources.front())) { sources.pop_front(); } pPipeline->addInitialSource(pSource); return runner; }
long long runCount( const char *ns, const BSONObj &cmd, string &err, int &errCode ) { // Lock 'ns'. Client::Context cx(ns); NamespaceDetails *d = nsdetails(ns); if (NULL == d) { err = "ns missing"; return -1; } BSONObj query = cmd.getObjectField("query"); long long count = 0; long long skip = cmd["skip"].numberLong(); long long limit = cmd["limit"].numberLong(); if (limit < 0) { limit = -limit; } // count of all objects if (query.isEmpty()) { return applySkipLimit(d->numRecords(), cmd); } CanonicalQuery* cq; // We pass -limit because a positive limit means 'batch size' but negative limit is a // hard limit. if (!CanonicalQuery::canonicalize(ns, query, skip, -limit, &cq).isOK()) { uasserted(17220, "could not canonicalize query " + query.toString()); return -2; } Runner* rawRunner; if (!getRunner(cq, &rawRunner).isOK()) { uasserted(17221, "could not get runner " + query.toString()); return -2; } auto_ptr<Runner> runner(rawRunner); try { const ScopedRunnerRegistration safety(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, NULL))) { ++count; } // Emulate old behavior and return the count even if the runner was killed. This // happens when the underlying collection is dropped. return count; } catch (const DBException &e) { err = e.toString(); errCode = e.getCode(); } catch (const std::exception &e) { err = e.what(); errCode = 0; } // Historically we have returned zero in many count assertion cases - see SERVER-2291. log() << "Count with ns: " << ns << " and query: " << query << " failed with exception: " << err << " code: " << errCode << endl; return -2; }
inline RelayRunner * getForeRunner() { return getRunner(0); }
long long DeleteExecutor::execute() { uassertStatusOK(prepare()); uassert(17417, mongoutils::str::stream() << "DeleteExecutor::prepare() failed to parse query " << _request->getQuery(), _isQueryParsed); const bool logop = _request->shouldCallLogOp(); const NamespaceString& ns(_request->getNamespaceString()); if (!_request->isGod()) { if (ns.isSystem()) { uassert(12050, "cannot delete from system namespace", legalClientSystemNS(ns.ns(), true)); } if (ns.ns().find('$') != string::npos) { log() << "cannot delete from collection with reserved $ in name: " << ns << endl; uasserted( 10100, "cannot delete from collection with reserved $ in name" ); } } massert(17418, mongoutils::str::stream() << "dbname = " << currentClient.get()->database()->name() << "; ns = " << ns.ns(), currentClient.get()->database()->name() == nsToDatabaseSubstring(ns.ns())); Collection* collection = currentClient.get()->database()->getCollection(ns.ns()); if (NULL == collection) { return 0; } uassert(10101, str::stream() << "cannot remove from a capped collection: " << ns.ns(), !collection->isCapped()); uassert(ErrorCodes::NotMaster, str::stream() << "Not primary while removing from " << ns.ns(), !logop || isMasterNs(ns.ns().c_str())); long long nDeleted = 0; const bool canYield = !_request->isGod() && ( _canonicalQuery.get() ? !QueryPlannerCommon::hasNode(_canonicalQuery->root(), MatchExpression::ATOMIC) : LiteParsedQuery::isQueryIsolated(_request->getQuery())); Runner* rawRunner; if (_canonicalQuery.get()) { uassertStatusOK(getRunner(collection, _canonicalQuery.release(), &rawRunner)); } else { CanonicalQuery* ignored; uassertStatusOK(getRunner(collection, ns.ns(), _request->getQuery(), &rawRunner, &ignored)); } auto_ptr<Runner> runner(rawRunner); auto_ptr<ScopedRunnerRegistration> safety; if (canYield) { safety.reset(new ScopedRunnerRegistration(runner.get())); runner->setYieldPolicy(Runner::YIELD_AUTO); } DiskLoc rloc; Runner::RunnerState state; CurOp* curOp = cc().curop(); int oldYieldCount = curOp->numYields(); while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &rloc))) { if (oldYieldCount != curOp->numYields()) { uassert(ErrorCodes::NotMaster, str::stream() << "No longer primary while removing from " << ns.ns(), !logop || isMasterNs(ns.ns().c_str())); oldYieldCount = curOp->numYields(); } BSONObj toDelete; // TODO: do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? runner->saveState(); collection->deleteDocument(rloc, false, false, logop ? &toDelete : NULL ); runner->restoreState(); nDeleted++; if (logop) { if ( toDelete.isEmpty() ) { problem() << "deleted object without id, not logging" << endl; } else { bool replJustOne = true; logOp("d", ns.ns().c_str(), toDelete, 0, &replJustOne); } } if (!_request->isMulti()) { break; } if (!_request->isGod()) { getDur().commitIfNeeded(); } if (debug && _request->isGod() && nDeleted == 100) { log() << "warning high number of deletes with god=true " << " which could use significant memory b/c we don't commit journal"; } } return nDeleted; }
Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, Runner** out) { // This should'a been checked by the distinct command. verify(collection); // TODO: check for idhack here? // When can we do a fast distinct hack? // 1. There is a plan with just one leaf and that leaf is an ixscan. // 2. The ixscan indexes the field we're interested in. // 2a: We are correct if the index contains the field but for now we look for prefix. // 3. The query is covered/no fetch. // // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. if (desc->keyPattern().firstElement().fieldName() == field) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } } // We only care about the field that we're projecting over. Have to drop the _id field // explicitly because those are .find() semantics. // // Applying a projection allows the planner to try to give us covered plans. BSONObj projection; if ("_id" == field) { projection = BSON("_id" << 1); } else { projection = BSON("_id" << 0 << field << 1); } // Apply a projection of the key. Empty BSONObj() is for the sort. CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq); if (!status.isOK()) { return status; } // No index has the field we're looking for. Punt to normal planning. if (plannerParams.indices.empty()) { // Takes ownership of cq. return getRunner(cq, out); } // If we're here, we have an index prefixed by the field we're distinct-ing over. // If there's no query, we can just distinct-scan one of the indices. if (query.isEmpty()) { DistinctNode* dn = new DistinctNode(); dn->indexKeyPattern = plannerParams.indices[0].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; QueryPlannerParams params; // Takes ownership of 'dn'. QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); verify(soln); WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*soln, &root, &ws)); *out = new SingleSolutionRunner(collection, cq, soln, root, ws); return Status::OK(); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { return getRunner(cq, out); } // XXX: why do we need to do this? planner should prob do this internally cq->root()->resetTag(); // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } // Build and return the SSR over solutions[i]. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(*solutions[i], &root, &ws)); *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws); return Status::OK(); } } // If we're here, the planner made a soln with the restricted index set but we couldn't // translate any of them into a distinct-compatible soln. So, delete the solutions and just // go through normal planning. for (size_t i = 0; i < solutions.size(); ++i) { delete solutions[i]; } return getRunner(cq, out); }
std::string newRunQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { // Validate the namespace. const char *ns = q.ns; uassert(16332, "can't have an empty ns", ns[0]); const NamespaceString nsString(ns); uassert(16256, str::stream() << "Invalid ns [" << ns << "]", nsString.isValid()); // Set curop information. curop.debug().ns = ns; curop.debug().ntoreturn = q.ntoreturn; curop.debug().query = q.query; curop.setQuery(q.query); // If the query is really a command, run it. if (nsString.isCommand()) { int nToReturn = q.ntoreturn; uassert(16979, str::stream() << "bad numberToReturn (" << nToReturn << ") for $cmd type ns - can only be 1 or -1", nToReturn == 1 || nToReturn == -1); curop.markCommand(); BufBuilder bb; bb.skip(sizeof(QueryResult)); BSONObjBuilder cmdResBuf; if (!runCommands(ns, q.query, curop, bb, cmdResBuf, false, q.queryOptions)) { uasserted(13530, "bad or malformed command request?"); } curop.debug().iscommand = true; // TODO: Does this get overwritten/do we really need to set this twice? curop.debug().query = q.query; QueryResult* qr = reinterpret_cast<QueryResult*>(bb.buf()); bb.decouple(); qr->setResultFlagsToOk(); qr->len = bb.len(); curop.debug().responseLength = bb.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = 1; result.setData(qr, true); return ""; } // This is a read lock. We require this because if we're parsing a $where, the // where-specific parsing code assumes we have a lock and creates execution machinery that // requires it. Client::ReadContext ctx(q.ns); Collection* collection = ctx.ctx().db()->getCollection( ns ); // Parse the qm into a CanonicalQuery. CanonicalQuery* cq; Status canonStatus = CanonicalQuery::canonicalize(q, &cq); if (!canonStatus.isOK()) { uasserted(17287, str::stream() << "Can't canonicalize query: " << canonStatus.toString()); } verify(cq); QLOG() << "Running query:\n" << cq->toString(); LOG(2) << "Running query: " << cq->toStringShort(); // Parse, canonicalize, plan, transcribe, and get a runner. Runner* rawRunner = NULL; // We use this a lot below. const LiteParsedQuery& pq = cq->getParsed(); // We'll now try to get the query runner that will execute this query for us. There // are a few cases in which we know upfront which runner we should get and, therefore, // we shortcut the selection process here. // // (a) If the query is over a collection that doesn't exist, we get a special runner // that's is so (a runner) which doesn't return results, the EOFRunner. // // (b) if the query is a replication's initial sync one, we get a SingleSolutinRunner // that uses a specifically designed stage that skips extents faster (see details in // exec/oplogstart.h) // // Otherwise we go through the selection of which runner is most suited to the // query + run-time context at hand. Status status = Status::OK(); if (collection == NULL) { rawRunner = new EOFRunner(cq, cq->ns()); } else if (pq.hasOption(QueryOption_OplogReplay)) { status = getOplogStartHack(collection, cq, &rawRunner); } else { // Takes ownership of cq. size_t options = QueryPlannerParams::DEFAULT; if (shardingState.needCollectionMetadata(pq.ns())) { options |= QueryPlannerParams::INCLUDE_SHARD_FILTER; } status = getRunner(cq, &rawRunner, options); } if (!status.isOK()) { // NOTE: Do not access cq as getRunner has deleted it. uasserted(17007, "Unable to execute query: " + status.reason()); } verify(NULL != rawRunner); auto_ptr<Runner> runner(rawRunner); // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState.getVersion(cq->ns()); // Handle query option $maxTimeMS (not used with commands). curop.setMaxTimeMicros(static_cast<unsigned long long>(pq.getMaxTimeMS()) * 1000); killCurrentOp.checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // uassert if we are not on a primary, and not a secondary with SlaveOk query parameter set. replVerifyReadsOk(&pq); // If this exists, the collection is sharded. // If it doesn't exist, we can assume we're not sharded. // If we're sharded, we might encounter data that is not consistent with our sharding state. // We must ignore this data. CollectionMetadataPtr collMetadata; if (!shardingState.needCollectionMetadata(pq.ns())) { collMetadata = CollectionMetadataPtr(); } else { collMetadata = shardingState.getCollectionMetadata(pq.ns()); } // Run the query. // bb is used to hold query results // this buffer should contain either requested documents per query or // explain information, but not both BufBuilder bb(32768); bb.skip(sizeof(QueryResult)); // How many results have we obtained from the runner? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. OpTime slaveReadTill; // Do we save the Runner in a ClientCursor for getMore calls later? bool saveClientCursor = false; // We turn on auto-yielding for the runner here. The runner registers itself with the // active runners list in ClientCursor. auto_ptr<ScopedRunnerRegistration> safety(new ScopedRunnerRegistration(runner.get())); runner->setYieldPolicy(Runner::YIELD_AUTO); BSONObj obj; Runner::RunnerState state; // uint64_t numMisplacedDocs = 0; // set this outside loop. we will need to use this both within loop and when deciding // to fill in explain information const bool isExplain = pq.isExplain(); // Have we retrieved info about which plan the runner will // use to execute the query yet? bool gotPlanInfo = false; PlanInfo* rawInfo; boost::scoped_ptr<PlanInfo> planInfo; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { // Add result to output buffer. This is unnecessary if explain info is requested if (!isExplain) { bb.appendBuf((void*)obj.objdata(), obj.objsize()); } // Count the result. ++numResults; // In the case of the multi plan runner, we may not be able to // successfully retrieve plan info until after the query starts // to run. This is because the multi plan runner doesn't know what // plan it will end up using until it runs candidates and selects // the best. // // TODO: Do we ever want to output what the MPR is comparing? if (!gotPlanInfo) { Status infoStatus = runner->getInfo(NULL, &rawInfo); if (infoStatus.isOK()) { gotPlanInfo = true; planInfo.reset(rawInfo); // planSummary is really a ThreadSafeString which copies the data from // the provided pointer. curop.debug().planSummary = planInfo->planSummary.c_str(); } } // Possibly note slave's position in the oplog. if (pq.hasOption(QueryOption_OplogReplay)) { BSONElement e = obj["ts"]; if (Date == e.type() || Timestamp == e.type()) { slaveReadTill = e._opTime(); } } // TODO: only one type of 2d search doesn't support this. We need a way to pull it out // of CanonicalQuery. :( const bool supportsGetMore = true; if (isExplain) { if (enoughForExplain(pq, numResults)) { break; } } else if (!supportsGetMore && (enough(pq, numResults) || bb.len() >= MaxBytesToReturnToClientAtOnce)) { break; } else if (enoughForFirstBatch(pq, numResults, bb.len())) { QLOG() << "Enough for first batch, wantMore=" << pq.wantMore() << " numToReturn=" << pq.getNumToReturn() << " numResults=" << numResults << endl; // If only one result requested assume it's a findOne() and don't save the cursor. if (pq.wantMore() && 1 != pq.getNumToReturn()) { QLOG() << " runner EOF=" << runner->isEOF() << endl; saveClientCursor = !runner->isEOF(); } break; } } // Try to get information about the plan which the runner // will use to execute the query, it we don't have it already. if (!gotPlanInfo) { Status infoStatus = runner->getInfo(NULL, &rawInfo); if (infoStatus.isOK()) { gotPlanInfo = true; planInfo.reset(rawInfo); // planSummary is really a ThreadSafeString which copies the data from // the provided pointer. curop.debug().planSummary = planInfo->planSummary.c_str(); } } // If we cache the runner later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the runner later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the runner. safety.reset(); // Caller expects exceptions thrown in certain cases. if (Runner::RUNNER_ERROR == state) { TypeExplain* bareExplain; Status res = runner->getInfo(&bareExplain, NULL); if (res.isOK()) { boost::scoped_ptr<TypeExplain> errorExplain(bareExplain); error() << "Runner error, stats:\n" << errorExplain->stats.jsonString(Strict, true); } uasserted(17144, "Runner error: " + WorkingSetCommon::toStatusString(obj)); } // Why save a dead runner? if (Runner::RUNNER_DEAD == state) { saveClientCursor = false; } else if (pq.hasOption(QueryOption_CursorTailable)) { // If we're tailing a capped collection, we don't bother saving the cursor if the // collection is empty. Otherwise, the semantics of the tailable cursor is that the // client will keep trying to read from it. So we'll keep it around. Collection* collection = ctx.ctx().db()->getCollection(cq->ns()); if (collection && collection->numRecords() != 0 && pq.getNumToReturn() != 1) { saveClientCursor = true; } } // TODO(greg): This will go away soon. if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongos can resend at this point throw SendStaleConfigException(pq.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(pq.ns())); } // Used to fill in explain and to determine if the query is slow enough to be logged. int elapsedMillis = curop.elapsedMillis(); // Get explain information if: // 1) it is needed by an explain query; // 2) profiling is enabled; or // 3) profiling is disabled but we still need explain details to log a "slow" query. // Producing explain information is expensive and should be done only if we are certain // the information will be used. boost::scoped_ptr<TypeExplain> explain(NULL); if (isExplain || ctx.ctx().db()->getProfilingLevel() > 0 || elapsedMillis > serverGlobalParams.slowMS) { // Ask the runner to produce explain information. TypeExplain* bareExplain; Status res = runner->getInfo(&bareExplain, NULL); if (res.isOK()) { explain.reset(bareExplain); } else if (isExplain) { error() << "could not produce explain of query '" << pq.getFilter() << "', error: " << res.reason(); // If numResults and the data in bb don't correspond, we'll crash later when rooting // through the reply msg. BSONObj emptyObj; bb.appendBuf((void*)emptyObj.objdata(), emptyObj.objsize()); // The explain output is actually a result. numResults = 1; // TODO: we can fill out millis etc. here just fine even if the plan screwed up. } } // Fill in the missing run-time fields in explain, starting with propeties of // the process running the query. if (isExplain && NULL != explain.get()) { std::string server = mongoutils::str::stream() << getHostNameCached() << ":" << serverGlobalParams.port; explain->setServer(server); // We might have skipped some results due to chunk migration etc. so our count is // correct. explain->setN(numResults); // Clock the whole operation. explain->setMillis(elapsedMillis); BSONObj explainObj = explain->toBSON(); bb.appendBuf((void*)explainObj.objdata(), explainObj.objsize()); // The explain output is actually a result. numResults = 1; } long long ccId = 0; if (saveClientCursor) { // We won't use the runner until it's getMore'd. runner->saveState(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(collection, runner.get(), cq->getParsed().getOptions(), cq->getParsed().getFilter()); ccId = cc->cursorid(); QLOG() << "caching runner with cursorid " << ccId << " after returning " << numResults << " results" << endl; // ClientCursor takes ownership of runner. Release to make sure it's not deleted. runner.release(); // TODO document if (pq.hasOption(QueryOption_OplogReplay) && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.hasOption(QueryOption_Exhaust)) { curop.debug().exhaust = true; } // Set attributes for getMore. cc->setCollMetadata(collMetadata); cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); } else { QLOG() << "Not caching runner but returning " << numResults << " results.\n"; } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult* qr = static_cast<QueryResult*>(result.header()); qr->cursorId = ccId; curop.debug().cursorid = (0 == ccId ? -1 : ccId); qr->setResultFlagsToOk(); qr->setOperation(opReply); qr->startingFrom = 0; qr->nReturned = numResults; // Set debug information for consumption by the profiler. curop.debug().ntoskip = pq.getSkip(); curop.debug().nreturned = numResults; if (NULL != explain.get()) { if (explain->isScanAndOrderSet()) { curop.debug().scanAndOrder = explain->getScanAndOrder(); } else { curop.debug().scanAndOrder = false; } if (explain->isNScannedSet()) { curop.debug().nscanned = explain->getNScanned(); } if (explain->isNScannedObjectsSet()) { curop.debug().nscannedObjects = explain->getNScannedObjects(); } if (explain->isIDHackSet()) { curop.debug().idhack = explain->getIDHack(); } if (!explain->stats.isEmpty()) { // execStats is a CachedBSONObj because it lives in the race-prone // curop. curop.debug().execStats.set(explain->stats); // Replace exec stats with plan summary if stats cannot fit into CachedBSONObj. if (curop.debug().execStats.tooBig() && !curop.debug().planSummary.empty()) { BSONObjBuilder bob; bob.append("summary", curop.debug().planSummary.toString()); curop.debug().execStats.set(bob.done()); } } } // curop.debug().exhaust is set above. return curop.debug().exhaust ? pq.ns() : ""; }
/** * This is called by db/ops/query.cpp. This is the entry point for answering a query. */ string newRunQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { log() << "Running query on new system: " << q.query.toString() << endl; // This is a read lock. Client::ReadContext ctx(q.ns, dbpath); // Parse, canonicalize, plan, transcribe, and get a runner. Runner* rawRunner; CanonicalQuery* cq; Status status = getRunner(q, &rawRunner, &cq); if (!status.isOK()) { uasserted(17007, "Couldn't process query " + q.query.toString() + " why: " + status.reason()); } verify(NULL != rawRunner); auto_ptr<Runner> runner(rawRunner); // We freak out later if this changes before we're done with the query. const ChunkVersion shardingVersionAtStart = shardingState.getVersion(q.ns); // We use this a lot below. const LiteParsedQuery& pq = cq->getParsed(); // TODO: Document why we do this. // TODO: do this when we can pass in our own parsed query //replVerifyReadsOk(&pq); // If this exists, the collection is sharded. // If it doesn't exist, we can assume we're not sharded. // If we're sharded, we might encounter data that is not consistent with our sharding state. // We must ignore this data. CollectionMetadataPtr collMetadata; if (!shardingState.needCollectionMetadata(pq.ns())) { collMetadata = CollectionMetadataPtr(); } else { collMetadata = shardingState.getCollectionMetadata(pq.ns()); } // Run the query. BufBuilder bb(32768); bb.skip(sizeof(QueryResult)); // How many results have we obtained from the runner? int numResults = 0; // If we're replaying the oplog, we save the last time that we read. OpTime slaveReadTill; // Do we save the Runner in a ClientCursor for getMore calls later? bool saveClientCursor = false; // We turn on auto-yielding for the runner here, so we must register it with the active // runners list in ClientCursor. ClientCursor::registerRunner(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { // If we're sharded make sure that we don't return any data that hasn't been migrated // off of our shared yet. if (collMetadata) { // This information can change if we yield and as such we must make sure to re-fetch // it if we yield. KeyPattern kp(collMetadata->getKeyPattern()); // This performs excessive BSONObj creation but that's OK for now. if (!collMetadata->keyBelongsToMe(kp.extractSingleKey(obj))) { continue; } } // Add result to output buffer. bb.appendBuf((void*)obj.objdata(), obj.objsize()); // Count the result. ++numResults; // Possibly note slave's position in the oplog. if (pq.hasOption(QueryOption_OplogReplay)) { BSONElement e = obj["ts"]; if (Date == e.type() || Timestamp == e.type()) { slaveReadTill = e._opTime(); } } // TODO: only one type of 2d search doesn't support this. We need a way to pull it out // of CanonicalQuery. :( const bool supportsGetMore = true; const bool isExplain = pq.isExplain(); if (isExplain && enoughForExplain(pq, numResults)) { break; } else if (!supportsGetMore && (enough(pq, numResults) || bb.len() >= MaxBytesToReturnToClientAtOnce)) { break; } else if (enoughForFirstBatch(pq, numResults, bb.len())) { // If only one result requested assume it's a findOne() and don't save the cursor. if (pq.wantMore() && 1 != pq.getNumToReturn()) { saveClientCursor = true; } break; } } // If we cache the runner later, we want to deregister it as it receives notifications // anyway by virtue of being cached. // // If we don't cache the runner later, we are deleting it, so it must be deregistered. // // So, no matter what, deregister the runner. ClientCursor::deregisterRunner(runner.get()); // Why save a dead runner? if (Runner::RUNNER_DEAD == state) { saveClientCursor = false; } // TODO: Stage creation can set tailable depending on what's in the parsed query. We have // the full parsed query available during planning...set it there. // // TODO: If we're tailable we want to save the client cursor. Make sure we do this later. //if (pq.hasOption(QueryOption_CursorTailable) && pq.getNumToReturn() != 1) { ... } // TODO(greg): This will go away soon. if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // if the version changed during the query we might be missing some data and its safe to // send this as mongos can resend at this point throw SendStaleConfigException(pq.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(pq.ns())); } long long ccId = 0; if (saveClientCursor) { // We won't use the runner until it's getMore'd. runner->saveState(); // Allocate a new ClientCursor. We don't have to worry about leaking it as it's // inserted into a global map by its ctor. ClientCursor* cc = new ClientCursor(runner.get(), cq->getParsed().getOptions(), cq->getParsed().getFilter()); ccId = cc->cursorid(); log() << "caching runner with cursorid " << ccId << endl; // ClientCursor takes ownership of runner. Release to make sure it's not deleted. runner.release(); // TODO document if (pq.hasOption(QueryOption_OplogReplay) && !slaveReadTill.isNull()) { cc->slaveReadTill(slaveReadTill); } // TODO document if (pq.hasOption(QueryOption_Exhaust)) { curop.debug().exhaust = true; } // Set attributes for getMore. cc->setCollMetadata(collMetadata); cc->setPos(numResults); // If the query had a time limit, remaining time is "rolled over" to the cursor (for // use by future getmore ops). cc->setLeftoverMaxTimeMicros(curop.getRemainingMaxTimeMicros()); } // Add the results from the query into the output buffer. result.appendData(bb.buf(), bb.len()); bb.decouple(); // Fill out the output buffer's header. QueryResult* qr = static_cast<QueryResult*>(result.header()); qr->cursorId = ccId; curop.debug().cursorid = (0 == ccId ? -1 : ccId); qr->setResultFlagsToOk(); qr->setOperation(opReply); qr->startingFrom = 0; qr->nReturned = numResults; // TODO: nscanned is bogus. // curop.debug().nscanned = ( cursor ? cursor->nscanned() : 0LL ); curop.debug().ntoskip = pq.getSkip(); curop.debug().nreturned = numResults; // curop.debug().exhaust is set above. return curop.debug().exhaust ? pq.ns() : ""; }
Status getOplogStartHack(Collection* collection, CanonicalQuery* cq, Runner** runnerOut) { if ( collection == NULL ) return Status(ErrorCodes::InternalError, "getOplogStartHack called with a NULL collection" ); // A query can only do oplog start finding if it has a top-level $gt or $gte predicate over // the "ts" field (the operation's timestamp). Find that predicate and pass it to // the OplogStart stage. MatchExpression* tsExpr = NULL; if (MatchExpression::AND == cq->root()->matchType()) { // The query has an AND at the top-level. See if any of the children // of the AND are $gt or $gte predicates over 'ts'. for (size_t i = 0; i < cq->root()->numChildren(); ++i) { MatchExpression* me = cq->root()->getChild(i); if (isOplogTsPred(me)) { tsExpr = me; break; } } } else if (isOplogTsPred(cq->root())) { // The root of the tree is a $gt or $gte predicate over 'ts'. tsExpr = cq->root(); } if (NULL == tsExpr) { return Status(ErrorCodes::OplogOperationUnsupported, "OplogReplay query does not contain top-level " "$gt or $gte over the 'ts' field."); } // Make an oplog start finding stage. WorkingSet* oplogws = new WorkingSet(); OplogStart* stage = new OplogStart(cq->ns(), tsExpr, oplogws); // Takes ownership of ws and stage. auto_ptr<InternalRunner> runner(new InternalRunner(collection, stage, oplogws)); runner->setYieldPolicy(Runner::YIELD_AUTO); // The stage returns a DiskLoc of where to start. DiskLoc startLoc; Runner::RunnerState state = runner->getNext(NULL, &startLoc); // This is normal. The start of the oplog is the beginning of the collection. if (Runner::RUNNER_EOF == state) { return getRunner(cq, runnerOut); } // This is not normal. An error was encountered. if (Runner::RUNNER_ADVANCED != state) { return Status(ErrorCodes::InternalError, "quick oplog start location had error...?"); } // cout << "diskloc is " << startLoc.toString() << endl; // Build our collection scan... CollectionScanParams params; params.ns = cq->ns(); params.start = startLoc; params.direction = CollectionScanParams::FORWARD; params.tailable = cq->getParsed().hasOption(QueryOption_CursorTailable); WorkingSet* ws = new WorkingSet(); CollectionScan* cs = new CollectionScan(params, ws, cq->root()); // Takes ownership of cq, cs, ws. *runnerOut = new SingleSolutionRunner(collection, cq, NULL, cs, ws); return Status::OK(); }
long long DeleteExecutor::execute(OperationContext* txn, Database* db) { uassertStatusOK(prepare()); uassert(17417, mongoutils::str::stream() << "DeleteExecutor::prepare() failed to parse query " << _request->getQuery(), _isQueryParsed); const bool logop = _request->shouldCallLogOp(); const NamespaceString& ns(_request->getNamespaceString()); if (!_request->isGod()) { if (ns.isSystem()) { uassert(12050, "cannot delete from system namespace", legalClientSystemNS(ns.ns(), true)); } if (ns.ns().find('$') != string::npos) { log() << "cannot delete from collection with reserved $ in name: " << ns << endl; uasserted( 10100, "cannot delete from collection with reserved $ in name" ); } } Collection* collection = db->getCollection(txn, ns.ns()); if (NULL == collection) { return 0; } uassert(10101, str::stream() << "cannot remove from a capped collection: " << ns.ns(), !collection->isCapped()); uassert(ErrorCodes::NotMaster, str::stream() << "Not primary while removing from " << ns.ns(), !logop || repl::isMasterNs(ns.ns().c_str())); long long nDeleted = 0; Runner* rawRunner; if (_canonicalQuery.get()) { uassertStatusOK(getRunner(collection, _canonicalQuery.release(), &rawRunner)); } else { CanonicalQuery* ignored; uassertStatusOK(getRunner(collection, ns.ns(), _request->getQuery(), &rawRunner, &ignored)); } auto_ptr<Runner> runner(rawRunner); ScopedRunnerRegistration safety(runner.get()); DiskLoc rloc; Runner::RunnerState state; CurOp* curOp = txn->getCurOp(); int oldYieldCount = curOp->numYields(); while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &rloc))) { if (oldYieldCount != curOp->numYields()) { uassert(ErrorCodes::NotMaster, str::stream() << "No longer primary while removing from " << ns.ns(), !logop || repl::isMasterNs(ns.ns().c_str())); oldYieldCount = curOp->numYields(); } BSONObj toDelete; // TODO: do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? runner->saveState(); collection->deleteDocument(txn, rloc, false, false, logop ? &toDelete : NULL ); runner->restoreState(txn); nDeleted++; if (logop) { if ( toDelete.isEmpty() ) { log() << "Deleted object without id in collection " << collection->ns() << ", not logging."; } else { bool replJustOne = true; repl::logOp(txn, "d", ns.ns().c_str(), toDelete, 0, &replJustOne); } } if (!_request->isMulti()) { break; } if (!_request->isGod()) { txn->recoveryUnit()->commitIfNeeded(); } if (debug && _request->isGod() && nDeleted == 100) { log() << "warning high number of deletes with god=true " << " which could use significant memory b/c we don't commit journal"; } } return nDeleted; }
bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string key = cmdObj["key"].valuestrsafe(); BSONObj keyPattern = BSON( key << 1 ); BSONObj query = getQuery( cmdObj ); int bufSize = BSONObjMaxUserSize - 4096; BufBuilder bb( bufSize ); char * start = bb.buf(); BSONArrayBuilder arr( bb ); BSONElementSet values; long long nscanned = 0; // locations looked at long long nscannedObjects = 0; // full objects looked at long long n = 0; // matches NamespaceDetails * d = nsdetails( ns ); string cursorName; if (!d) { result.appendArray( "values" , BSONObj() ); result.append("stats", BSON("n" << 0 << "nscanned" << 0 << "nscannedObjects" << 0)); return true; } CanonicalQuery* cq; // XXX: project out just the field we're distinct-ing. May be covered... if (!CanonicalQuery::canonicalize(ns, query, &cq).isOK()) { uasserted(17215, "Can't canonicalize query " + query.toString()); return 0; } Runner* rawRunner; if (!getRunner(cq, &rawRunner).isOK()) { uasserted(17216, "Can't get runner for query " + query.toString()); return 0; } auto_ptr<Runner> runner(rawRunner); auto_ptr<DeregisterEvenIfUnderlyingCodeThrows> safety; ClientCursor::registerRunner(runner.get()); runner->setYieldPolicy(Runner::YIELD_AUTO); safety.reset(new DeregisterEvenIfUnderlyingCodeThrows(runner.get())); BSONObj obj; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(&obj, NULL))) { BSONElementSet elts; obj.getFieldsDotted(key, elts); for (BSONElementSet::iterator it = elts.begin(); it != elts.end(); ++it) { BSONElement elt = *it; if (values.count(elt)) { continue; } int currentBufPos = bb.len(); uassert(17217, "distinct too big, 16mb cap", (currentBufPos + elt.size() + 1024) < bufSize); arr.append(elt); BSONElement x(start + currentBufPos); values.insert(x); } } TypeExplain* bareExplain; Status res = runner->getExplainPlan(&bareExplain); if (res.isOK()) { auto_ptr<TypeExplain> explain(bareExplain); if (explain->isCursorSet()) { cursorName = explain->getCursor(); } n = explain->getN(); nscanned = explain->getNScanned(); nscannedObjects = explain->getNScannedObjects(); } verify( start == bb.buf() ); result.appendArray( "values" , arr.done() ); { BSONObjBuilder b; b.appendNumber( "n" , n ); b.appendNumber( "nscanned" , nscanned ); b.appendNumber( "nscannedObjects" , nscannedObjects ); b.appendNumber( "timems" , t.millis() ); b.append( "cursor" , cursorName ); result.append( "stats" , b.obj() ); } return true; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { const string ns = dbname + "." + cmdObj.firstElement().valuestr(); if (!cmdObj["start"].eoo()) { errmsg = "using deprecated 'start' argument to geoNear"; return false; } Client::ReadContext ctx(txn, ns); Database* db = ctx.ctx().db(); if ( !db ) { errmsg = "can't find ns"; return false; } Collection* collection = db->getCollection( txn, ns ); if ( !collection ) { errmsg = "can't find ns"; return false; } IndexCatalog* indexCatalog = collection->getIndexCatalog(); // cout << "raw cmd " << cmdObj.toString() << endl; // We seek to populate this. string nearFieldName; bool using2DIndex = false; if (!getFieldName(collection, indexCatalog, &nearFieldName, &errmsg, &using2DIndex)) { return false; } uassert(17304, "'near' field must be point", !cmdObj["near"].eoo() && cmdObj["near"].isABSONObj() && GeoParser::isPoint(cmdObj["near"].Obj())); bool isSpherical = cmdObj["spherical"].trueValue(); if (!using2DIndex) { uassert(17301, "2dsphere index must have spherical: true", isSpherical); } // Build the $near expression for the query. BSONObjBuilder nearBob; if (isSpherical) { nearBob.append("$nearSphere", cmdObj["near"].Obj()); } else { nearBob.append("$near", cmdObj["near"].Obj()); } if (!cmdObj["maxDistance"].eoo()) { uassert(17299, "maxDistance must be a number",cmdObj["maxDistance"].isNumber()); nearBob.append("$maxDistance", cmdObj["maxDistance"].number()); } if (!cmdObj["minDistance"].eoo()) { uassert(17298, "minDistance doesn't work on 2d index", !using2DIndex); uassert(17300, "minDistance must be a number",cmdObj["minDistance"].isNumber()); nearBob.append("$minDistance", cmdObj["minDistance"].number()); } if (!cmdObj["uniqueDocs"].eoo()) { warning() << ns << ": ignoring deprecated uniqueDocs option in geoNear command"; } // And, build the full query expression. BSONObjBuilder queryBob; queryBob.append(nearFieldName, nearBob.obj()); if (!cmdObj["query"].eoo() && cmdObj["query"].isABSONObj()) { queryBob.appendElements(cmdObj["query"].Obj()); } BSONObj rewritten = queryBob.obj(); // cout << "rewritten query: " << rewritten.toString() << endl; int numWanted = 100; const char* limitName = !cmdObj["num"].eoo() ? "num" : "limit"; BSONElement eNumWanted = cmdObj[limitName]; if (!eNumWanted.eoo()) { uassert(17303, "limit must be number", eNumWanted.isNumber()); numWanted = eNumWanted.numberInt(); uassert(17302, "limit must be >=0", numWanted >= 0); } bool includeLocs = false; if (!cmdObj["includeLocs"].eoo()) { includeLocs = cmdObj["includeLocs"].trueValue(); } double distanceMultiplier = 1.0; BSONElement eDistanceMultiplier = cmdObj["distanceMultiplier"]; if (!eDistanceMultiplier.eoo()) { uassert(17296, "distanceMultiplier must be a number", eDistanceMultiplier.isNumber()); distanceMultiplier = eDistanceMultiplier.number(); uassert(17297, "distanceMultiplier must be non-negative", distanceMultiplier >= 0); } BSONObj projObj = BSON("$pt" << BSON("$meta" << LiteParsedQuery::metaGeoNearPoint) << "$dis" << BSON("$meta" << LiteParsedQuery::metaGeoNearDistance)); CanonicalQuery* cq; const NamespaceString nss(dbname); const WhereCallbackReal whereCallback(nss.db()); if (!CanonicalQuery::canonicalize(ns, rewritten, BSONObj(), projObj, 0, numWanted, BSONObj(), &cq, whereCallback).isOK()) { errmsg = "Can't parse filter / create query"; return false; } Runner* rawRunner; if (!getRunner(collection, cq, &rawRunner, 0).isOK()) { errmsg = "can't get query runner"; return false; } auto_ptr<Runner> runner(rawRunner); const ScopedRunnerRegistration safety(runner.get()); double totalDistance = 0; BSONObjBuilder resultBuilder(result.subarrayStart("results")); double farthestDist = 0; BSONObj currObj; int results = 0; while ((results < numWanted) && Runner::RUNNER_ADVANCED == runner->getNext(&currObj, NULL)) { // Come up with the correct distance. double dist = currObj["$dis"].number() * distanceMultiplier; totalDistance += dist; if (dist > farthestDist) { farthestDist = dist; } // Strip out '$dis' and '$pt' from the result obj. The rest gets added as 'obj' // in the command result. BSONObjIterator resIt(currObj); BSONObjBuilder resBob; while (resIt.more()) { BSONElement elt = resIt.next(); if (!mongoutils::str::equals("$pt", elt.fieldName()) && !mongoutils::str::equals("$dis", elt.fieldName())) { resBob.append(elt); } } BSONObj resObj = resBob.obj(); // Don't make a too-big result object. if (resultBuilder.len() + resObj.objsize()> BSONObjMaxUserSize) { warning() << "Too many geoNear results for query " << rewritten.toString() << ", truncating output."; break; } // Add the next result to the result builder. BSONObjBuilder oneResultBuilder( resultBuilder.subobjStart(BSONObjBuilder::numStr(results))); oneResultBuilder.append("dis", dist); if (includeLocs) { oneResultBuilder.appendAs(currObj["$pt"], "loc"); } oneResultBuilder.append("obj", resObj); oneResultBuilder.done(); ++results; } resultBuilder.done(); // Fill out the stats subobj. BSONObjBuilder stats(result.subobjStart("stats")); // Fill in nscanned from the explain. TypeExplain* bareExplain; Status res = runner->getInfo(&bareExplain, NULL); if (res.isOK()) { auto_ptr<TypeExplain> explain(bareExplain); stats.append("nscanned", explain->getNScanned()); stats.append("objectsLoaded", explain->getNScannedObjects()); } stats.append("avgDistance", totalDistance / results); stats.append("maxDistance", farthestDist); stats.append("time", txn->getCurOp()->elapsedMillis()); stats.done(); return true; }
Status getRunnerDistinct(Collection* collection, const BSONObj& query, const string& field, Runner** out) { // This should'a been checked by the distinct command. verify(collection); // TODO: check for idhack here? // When can we do a fast distinct hack? // 1. There is a plan with just one leaf and that leaf is an ixscan. // 2. The ixscan indexes the field we're interested in. // 2a: We are correct if the index contains the field but for now we look for prefix. // 3. The query is covered/no fetch. // // We go through normal planning (with limited parameters) to see if we can produce // a soln with the above properties. QueryPlannerParams plannerParams; plannerParams.options = QueryPlannerParams::NO_TABLE_SCAN; IndexCatalog::IndexIterator ii = collection->getIndexCatalog()->getIndexIterator(false); while (ii.more()) { const IndexDescriptor* desc = ii.next(); // The distinct hack can work if any field is in the index but it's not always clear // if it's a win unless it's the first field. if (desc->keyPattern().firstElement().fieldName() == field) { plannerParams.indices.push_back(IndexEntry(desc->keyPattern(), desc->getAccessMethodName(), desc->isMultikey(), desc->isSparse(), desc->indexName(), desc->infoObj())); } } // If there are no suitable indices for the distinct hack bail out now into regular planning // with no projection. if (plannerParams.indices.empty()) { CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); } // // If we're here, we have an index prefixed by the field we're distinct-ing over. // // Applying a projection allows the planner to try to give us covered plans that we can turn // into the projection hack. getDistinctProjection deals with .find() projection semantics // (ie _id:1 being implied by default). BSONObj projection = getDistinctProjection(field); // Apply a projection of the key. Empty BSONObj() is for the sort. CanonicalQuery* cq; Status status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), projection, &cq); if (!status.isOK()) { return status; } // If there's no query, we can just distinct-scan one of the indices. // Not every index in plannerParams.indices may be suitable. Refer to // getDistinctNodeIndex(). size_t distinctNodeIndex = 0; if (query.isEmpty() && getDistinctNodeIndex(plannerParams.indices, field, &distinctNodeIndex)) { DistinctNode* dn = new DistinctNode(); dn->indexKeyPattern = plannerParams.indices[distinctNodeIndex].keyPattern; dn->direction = 1; IndexBoundsBuilder::allValuesBounds(dn->indexKeyPattern, &dn->bounds); dn->fieldNo = 0; QueryPlannerParams params; // Takes ownership of 'dn'. QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(*cq, params, dn); verify(soln); LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*soln); WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *soln, &root, &ws)); *out = new SingleSolutionRunner(collection, cq, soln, root, ws); return Status::OK(); } // See if we can answer the query in a fast-distinct compatible fashion. vector<QuerySolution*> solutions; status = QueryPlanner::plan(*cq, plannerParams, &solutions); if (!status.isOK()) { return getRunner(collection, cq, out); } // We look for a solution that has an ixscan we can turn into a distinctixscan for (size_t i = 0; i < solutions.size(); ++i) { if (turnIxscanIntoDistinctIxscan(solutions[i], field)) { // Great, we can use solutions[i]. Clean up the other QuerySolution(s). for (size_t j = 0; j < solutions.size(); ++j) { if (j != i) { delete solutions[j]; } } LOG(2) << "Using fast distinct: " << cq->toStringShort() << ", planSummary: " << getPlanSummary(*solutions[i]); // Build and return the SSR over solutions[i]. WorkingSet* ws; PlanStage* root; verify(StageBuilder::build(collection, *solutions[i], &root, &ws)); *out = new SingleSolutionRunner(collection, cq, solutions[i], root, ws); return Status::OK(); } } // If we're here, the planner made a soln with the restricted index set but we couldn't // translate any of them into a distinct-compatible soln. So, delete the solutions and just // go through normal planning. for (size_t i = 0; i < solutions.size(); ++i) { delete solutions[i]; } // We drop the projection from the 'cq'. Unfortunately this is not trivial. delete cq; status = CanonicalQuery::canonicalize(collection->ns().ns(), query, BSONObj(), BSONObj(), &cq); if (!status.isOK()) { return status; } // Takes ownership of cq. return getRunner(collection, cq, out); }
UpdateResult update( OperationContext* txn, Database* db, const UpdateRequest& request, OpDebug* opDebug, UpdateDriver* driver, CanonicalQuery* cq) { LOG(3) << "processing update : " << request; std::auto_ptr<CanonicalQuery> cqHolder(cq); const NamespaceString& nsString = request.getNamespaceString(); UpdateLifecycle* lifecycle = request.getLifecycle(); Collection* collection = db->getCollection(txn, nsString.ns()); validateUpdate(nsString.ns().c_str(), request.getUpdates(), request.getQuery()); // TODO: This seems a bit circuitious. opDebug->updateobj = request.getUpdates(); if (lifecycle) { lifecycle->setCollection(collection); driver->refreshIndexKeys(lifecycle->getIndexKeys()); } Runner* rawRunner; Status status = cq ? getRunner(collection, cqHolder.release(), &rawRunner) : getRunner(collection, nsString.ns(), request.getQuery(), &rawRunner, &cq); uassert(17243, "could not get runner " + request.getQuery().toString() + "; " + causedBy(status), status.isOK()); // Create the runner and setup all deps. auto_ptr<Runner> runner(rawRunner); // Register Runner with ClientCursor const ScopedRunnerRegistration safety(runner.get()); // // We'll start assuming we have one or more documents for this update. (Otherwise, // we'll fall-back to insert case (if upsert is true).) // // We are an update until we fall into the insert case below. driver->setContext(ModifierInterface::ExecInfo::UPDATE_CONTEXT); int numMatched = 0; // If the update was in-place, we may see it again. This only matters if we're doing // a multi-update; if we're not doing a multi-update we stop after one update and we // won't see any more docs. // // For example: If we're scanning an index {x:1} and performing {$inc:{x:5}}, we'll keep // moving the document forward and it will continue to reappear in our index scan. // Unless the index is multikey, the underlying query machinery won't de-dup. // // If the update wasn't in-place we may see it again. Our query may return the new // document and we wouldn't want to update that. // // So, no matter what, we keep track of where the doc wound up. typedef unordered_set<DiskLoc, DiskLoc::Hasher> DiskLocSet; const scoped_ptr<DiskLocSet> updatedLocs(request.isMulti() ? new DiskLocSet : NULL); // Reset these counters on each call. We might re-enter this function to retry this // update if we throw a page fault exception below, and we rely on these counters // reflecting only the actions taken locally. In particlar, we must have the no-op // counter reset so that we can meaningfully comapre it with numMatched above. opDebug->nscanned = 0; opDebug->nscannedObjects = 0; opDebug->nModified = 0; // Get the cached document from the update driver. mutablebson::Document& doc = driver->getDocument(); mutablebson::DamageVector damages; // Used during iteration of docs BSONObj oldObj; // Get first doc, and location Runner::RunnerState state = Runner::RUNNER_ADVANCED; uassert(ErrorCodes::NotMaster, mongoutils::str::stream() << "Not primary while updating " << nsString.ns(), !request.shouldCallLogOp() || repl::getGlobalReplicationCoordinator()->canAcceptWritesForDatabase( nsString.db())); while (true) { // Get next doc, and location DiskLoc loc; state = runner->getNext(&oldObj, &loc); if (state != Runner::RUNNER_ADVANCED) { if (state == Runner::RUNNER_EOF) { // We have reached the logical end of the loop, so do yielding recovery break; } else { uassertStatusOK(Status(ErrorCodes::InternalError, str::stream() << " Update query failed -- " << Runner::statestr(state))); } } // We fill this with the new locs of moved doc so we don't double-update. if (updatedLocs && updatedLocs->count(loc) > 0) { continue; } // We count how many documents we scanned even though we may skip those that are // deemed duplicated. The final 'numMatched' and 'nscanned' numbers may differ for // that reason. // TODO: Do we want to pull this out of the underlying query plan? opDebug->nscanned++; // Found a matching document opDebug->nscannedObjects++; numMatched++; // Ask the driver to apply the mods. It may be that the driver can apply those "in // place", that is, some values of the old document just get adjusted without any // change to the binary layout on the bson layer. It may be that a whole new // document is needed to accomodate the new bson layout of the resulting document. doc.reset(oldObj, mutablebson::Document::kInPlaceEnabled); BSONObj logObj; FieldRefSet updatedFields; Status status = Status::OK(); if (!driver->needMatchDetails()) { // If we don't need match details, avoid doing the rematch status = driver->update(StringData(), &doc, &logObj, &updatedFields); } else { // If there was a matched field, obtain it. MatchDetails matchDetails; matchDetails.requestElemMatchKey(); dassert(cq); verify(cq->root()->matchesBSON(oldObj, &matchDetails)); string matchedField; if (matchDetails.hasElemMatchKey()) matchedField = matchDetails.elemMatchKey(); // TODO: Right now, each mod checks in 'prepare' that if it needs positional // data, that a non-empty StringData() was provided. In principle, we could do // that check here in an else clause to the above conditional and remove the // checks from the mods. status = driver->update(matchedField, &doc, &logObj, &updatedFields); } if (!status.isOK()) { uasserted(16837, status.reason()); } // Ensure _id exists and is first uassertStatusOK(ensureIdAndFirst(doc)); // If the driver applied the mods in place, we can ask the mutable for what // changed. We call those changes "damages". :) We use the damages to inform the // journal what was changed, and then apply them to the original document // ourselves. If, however, the driver applied the mods out of place, we ask it to // generate a new, modified document for us. In that case, the file manager will // take care of the journaling details for us. // // This code flow is admittedly odd. But, right now, journaling is baked in the file // manager. And if we aren't using the file manager, we have to do jounaling // ourselves. bool docWasModified = false; BSONObj newObj; const char* source = NULL; bool inPlace = doc.getInPlaceUpdates(&damages, &source); // If something changed in the document, verify that no immutable fields were changed // and data is valid for storage. if ((!inPlace || !damages.empty()) ) { if (!(request.isFromReplication() || request.isFromMigration())) { const std::vector<FieldRef*>* immutableFields = NULL; if (lifecycle) immutableFields = lifecycle->getImmutableFields(); uassertStatusOK(validate(oldObj, updatedFields, doc, immutableFields, driver->modOptions()) ); } } // Save state before making changes runner->saveState(); if (inPlace && !driver->modsAffectIndices()) { // If a set of modifiers were all no-ops, we are still 'in place', but there is // no work to do, in which case we want to consider the object unchanged. if (!damages.empty() ) { collection->updateDocumentWithDamages( txn, loc, source, damages ); docWasModified = true; opDebug->fastmod = true; } newObj = oldObj; } else { // The updates were not in place. Apply them through the file manager. // XXX: With experimental document-level locking, we do not hold the sufficient // locks, so this would cause corruption. fassert(18516, !useExperimentalDocLocking); newObj = doc.getObject(); uassert(17419, str::stream() << "Resulting document after update is larger than " << BSONObjMaxUserSize, newObj.objsize() <= BSONObjMaxUserSize); StatusWith<DiskLoc> res = collection->updateDocument(txn, loc, newObj, true, opDebug); uassertStatusOK(res.getStatus()); DiskLoc newLoc = res.getValue(); docWasModified = true; // If the document moved, we might see it again in a collection scan (maybe it's // a document after our current document). // // If the document is indexed and the mod changes an indexed value, we might see it // again. For an example, see the comment above near declaration of updatedLocs. if (updatedLocs && (newLoc != loc || driver->modsAffectIndices())) { updatedLocs->insert(newLoc); } } // Restore state after modification uassert(17278, "Update could not restore runner state after updating a document.", runner->restoreState(txn)); // Call logOp if requested. if (request.shouldCallLogOp() && !logObj.isEmpty()) { BSONObj idQuery = driver->makeOplogEntryQuery(newObj, request.isMulti()); repl::logOp(txn, "u", nsString.ns().c_str(), logObj , &idQuery, NULL, request.isFromMigration()); } // Only record doc modifications if they wrote (exclude no-ops) if (docWasModified) opDebug->nModified++; if (!request.isMulti()) { break; } // Opportunity for journaling to write during the update. txn->recoveryUnit()->commitIfNeeded(); } // TODO: Can this be simplified? if ((numMatched > 0) || (numMatched == 0 && !request.isUpsert()) ) { opDebug->nMatched = numMatched; return UpdateResult(numMatched > 0 /* updated existing object(s) */, !driver->isDocReplacement() /* $mod or obj replacement */, opDebug->nModified /* number of modified docs, no no-ops */, numMatched /* # of docs matched/updated, even no-ops */, BSONObj()); } // // We haven't found any existing document so an insert is done // (upsert is true). // opDebug->upsert = true; // Since this is an insert (no docs found and upsert:true), we will be logging it // as an insert in the oplog. We don't need the driver's help to build the // oplog record, then. We also set the context of the update driver to the INSERT_CONTEXT. // Some mods may only work in that context (e.g. $setOnInsert). driver->setLogOp(false); driver->setContext(ModifierInterface::ExecInfo::INSERT_CONTEXT); // Reset the document we will be writing to doc.reset(); // This remains the empty object in the case of an object replacement, but in the case // of an upsert where we are creating a base object from the query and applying mods, // we capture the query as the original so that we can detect immutable field mutations. BSONObj original = BSONObj(); // Calling createFromQuery will populate the 'doc' with fields from the query which // creates the base of the update for the inserterd doc (because upsert was true) if (cq) { uassertStatusOK(driver->populateDocumentWithQueryFields(cq, doc)); // Validate the base doc, as taken from the query -- no fields means validate all. FieldRefSet noFields; uassertStatusOK(validate(BSONObj(), noFields, doc, NULL, driver->modOptions())); if (!driver->isDocReplacement()) { opDebug->fastmodinsert = true; // We need all the fields from the query to compare against for validation below. original = doc.getObject(); } else { original = request.getQuery(); } } else { fassert(17354, CanonicalQuery::isSimpleIdQuery(request.getQuery())); BSONElement idElt = request.getQuery()["_id"]; original = idElt.wrap(); fassert(17352, doc.root().appendElement(idElt)); } // Apply the update modifications and then log the update as an insert manually. FieldRefSet updatedFields; status = driver->update(StringData(), &doc, NULL, &updatedFields); if (!status.isOK()) { uasserted(16836, status.reason()); } // Ensure _id exists and is first uassertStatusOK(ensureIdAndFirst(doc)); // Validate that the object replacement or modifiers resulted in a document // that contains all the immutable keys and can be stored. if (!(request.isFromReplication() || request.isFromMigration())){ const std::vector<FieldRef*>* immutableFields = NULL; if (lifecycle) immutableFields = lifecycle->getImmutableFields(); // This will only validate the modified fields if not a replacement. uassertStatusOK(validate(original, updatedFields, doc, immutableFields, driver->modOptions()) ); } // Only create the collection if the doc will be inserted. if (!collection) { collection = db->getCollection(txn, request.getNamespaceString().ns()); if (!collection) { collection = db->createCollection(txn, request.getNamespaceString().ns()); } } // Insert the doc BSONObj newObj = doc.getObject(); uassert(17420, str::stream() << "Document to upsert is larger than " << BSONObjMaxUserSize, newObj.objsize() <= BSONObjMaxUserSize); StatusWith<DiskLoc> newLoc = collection->insertDocument(txn, newObj, !request.isGod() /*enforceQuota*/); uassertStatusOK(newLoc.getStatus()); if (request.shouldCallLogOp()) { repl::logOp(txn, "i", nsString.ns().c_str(), newObj, NULL, NULL, request.isFromMigration()); } opDebug->nMatched = 1; return UpdateResult(false /* updated a non existing document */, !driver->isDocReplacement() /* $mod or obj replacement? */, 1 /* docs written*/, 1 /* count of updated documents */, newObj /* object that was upserted */ ); }
/** * Pass the baton (i.e., the call) to the appropriate thread. */ void RelayRace::passBaton(trace::Call *call) { if (0) std::cerr << "switching to thread " << call->thread_id << "\n"; RelayRunner *runner = getRunner(call->thread_id); runner->receiveBaton(call); }
/* ns: namespace, e.g. <database>.<collection> pattern: the "where" clause / criteria justOne: stop after 1 match god: allow access to system namespaces, and don't yield */ long long deleteObjects(const StringData& ns, BSONObj pattern, bool justOne, bool logop, bool god) { if (!god) { if (ns.find( ".system.") != string::npos) { // note a delete from system.indexes would corrupt the db if done here, as there are // pointers into those objects in NamespaceDetails. uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns, true ) ); } if (ns.find('$') != string::npos) { log() << "cannot delete from collection with reserved $ in name: " << ns << endl; uasserted( 10100, "cannot delete from collection with reserved $ in name" ); } } Collection* collection = currentClient.get()->database()->getCollection(ns); if (NULL == collection) { return 0; } uassert(10101, str::stream() << "can't remove from a capped collection: " << ns, !collection->isCapped()); string nsForLogOp = ns.toString(); // XXX-ERH long long nDeleted = 0; CanonicalQuery* cq; if (!CanonicalQuery::canonicalize(ns.toString(), pattern, &cq).isOK()) { uasserted(17218, "Can't canonicalize query " + pattern.toString()); return 0; } bool canYield = !god && !QueryPlannerCommon::hasNode(cq->root(), MatchExpression::ATOMIC); Runner* rawRunner; if (!getRunner(cq, &rawRunner).isOK()) { uasserted(17219, "Can't get runner for query " + pattern.toString()); return 0; } auto_ptr<Runner> runner(rawRunner); auto_ptr<ScopedRunnerRegistration> safety; if (canYield) { safety.reset(new ScopedRunnerRegistration(runner.get())); runner->setYieldPolicy(Runner::YIELD_AUTO); } DiskLoc rloc; Runner::RunnerState state; while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &rloc))) { BSONObj toDelete; // XXX: do we want to buffer docs and delete them in a group rather than // saving/restoring state repeatedly? runner->saveState(); collection->deleteDocument(rloc, false, false, logop ? &toDelete : NULL ); runner->restoreState(); nDeleted++; if (logop) { if ( toDelete.isEmpty() ) { problem() << "deleted object without id, not logging" << endl; } else { bool replJustOne = true; logOp("d", nsForLogOp.c_str(), toDelete, 0, &replJustOne); } } if (justOne) { break; } if (!god) { getDur().commitIfNeeded(); } if (debug && god && nDeleted == 100) { log() << "warning high number of deletes with god=true " << " which could use significant memory b/c we don't commit journal"; } } return nDeleted; }