/** * Uses 'cursor' and 'request' to fill out 'nextBatch' with the batch of result documents to * be returned by this getMore. * * Returns the number of documents in the batch in *numResults, which must be initialized to * zero by the caller. Returns the final ExecState returned by the cursor in *state. * * Returns an OK status if the batch was successfully generated, and a non-OK status if the * PlanExecutor encounters a failure. */ Status generateBatch(ClientCursor* cursor, const GetMoreRequest& request, BSONArrayBuilder* nextBatch, PlanExecutor::ExecState* state, int* numResults) { PlanExecutor* exec = cursor->getExecutor(); const bool isAwaitData = isCursorAwaitData(cursor); // If an awaitData getMore is killed during this process due to our max time expiring at // an interrupt point, we just continue as normal and return rather than reporting a // timeout to the user. BSONObj obj; try { while (PlanExecutor::ADVANCED == (*state = exec->getNext(&obj, NULL))) { // If adding this object will cause us to exceed the BSON size limit, then we // stash it for later. if (nextBatch->len() + obj.objsize() > BSONObjMaxUserSize && *numResults > 0) { exec->enqueue(obj); break; } // Add result to output buffer. nextBatch->append(obj); (*numResults)++; if (enoughForGetMore(request.batchSize.value_or(0), *numResults, nextBatch->len())) { break; } } } catch (const UserException& except) { if (isAwaitData && except.getCode() == ErrorCodes::ExceededTimeLimit) { // We ignore exceptions from interrupt points due to max time expiry for // awaitData cursors. } else { throw; } } if (PlanExecutor::FAILURE == *state) { const std::unique_ptr<PlanStageStats> stats(exec->getStats()); error() << "GetMore executor error, stats: " << Explain::statsToBSON(*stats); return Status(ErrorCodes::OperationFailed, str::stream() << "GetMore executor error: " << WorkingSetCommon::toStatusString(obj)); } else if (PlanExecutor::DEAD == *state) { return Status(ErrorCodes::OperationFailed, str::stream() << "Plan executor killed during getMore command, " << "ns: " << request.nss.ns()); } return Status::OK(); }
/** * Runs a query using the following steps: * 1) Parsing. * 2) Acquire locks. * 3) Plan query, obtaining an executor that can run it. * 4) Setup a cursor for the query, which may be used on subsequent getMores. * 5) Generate the first batch. * 6) Save state for getMore. * 7) Generate response to send to the client. * * TODO: Rather than using the sharding version available in thread-local storage (i.e. the * call to ShardingState::needCollectionMetadata() below), shard version information * should be passed as part of the command parameter. */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { const std::string fullns = parseNs(dbname, cmdObj); const NamespaceString nss(fullns); if (!nss.isValid()) { return appendCommandStatus(result, {ErrorCodes::InvalidNamespace, str::stream() << "Invalid collection name: " << nss.ns()}); } // Although it is a command, a find command gets counted as a query. globalOpCounters.gotQuery(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus( result, Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()")); } // 1a) Parse the command BSON to a LiteParsedQuery. const bool isExplain = false; auto lpqStatus = LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain); if (!lpqStatus.isOK()) { return appendCommandStatus(result, lpqStatus.getStatus()); } auto& lpq = lpqStatus.getValue(); // Validate term, if provided. if (auto term = lpq->getReplicationTerm()) { auto replCoord = repl::ReplicationCoordinator::get(txn); Status status = replCoord->updateTerm(*term); // Note: updateTerm returns ok if term stayed the same. if (!status.isOK()) { return appendCommandStatus(result, status); } } // Fill out curop information. long long ntoreturn = lpq->getBatchSize().value_or(0); beginQueryOp(txn, nss, cmdObj, ntoreturn, lpq->getSkip()); // 1b) Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery. WhereCallbackReal whereCallback(txn, nss.db()); auto statusWithCQ = CanonicalQuery::canonicalize(lpq.release(), whereCallback); if (!statusWithCQ.isOK()) { return appendCommandStatus(result, statusWithCQ.getStatus()); } std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); // 2) Acquire locks. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; ShardingState* const shardingState = ShardingState::get(txn); // It is possible that the sharding version will change during yield while we are // retrieving a plan executor. If this happens we will throw an error and mongos will // retry. const ChunkVersion shardingVersionAtStart = shardingState->getVersion(nss.ns()); // 3) Get the execution plan for the query. auto statusWithPlanExecutor = getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); // TODO: Currently, chunk ranges are kept around until all ClientCursors created while // the chunk belonged on this node are gone. Separating chunk lifetime management from // ClientCursor should allow this check to go away. if (!shardingState->getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // Version changed while retrieving a PlanExecutor. Terminate the operation, // signaling that mongos should retry. throw SendStaleConfigException(nss.ns(), "version changed during find command", shardingVersionAtStart, shardingState->getVersion(nss.ns())); } if (!collection) { // No collection. Just fill out curop indicating that there were zero results and // there is no ClientCursor id, and then return. const long long numResults = 0; const CursorId cursorId = 0; endQueryOp(txn, *exec, dbProfilingLevel, numResults, cursorId); appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); return true; } const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // 4) If possible, register the execution plan inside a ClientCursor, and pin that // cursor. In this case, ownership of the PlanExecutor is transferred to the // ClientCursor, and 'exec' becomes null. // // First unregister the PlanExecutor so it can be re-registered with ClientCursor. exec->deregisterExec(); // Create a ClientCursor containing this plan executor. We don't have to worry // about leaking it as it's inserted into a global map by its ctor. ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), pq.getOptions(), pq.getFilter()); CursorId cursorId = cursor->cursorid(); ClientCursorPin ccPin(collection->getCursorManager(), cursorId); // On early return, get rid of the the cursor. ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, ccPin); invariant(!exec); PlanExecutor* cursorExec = cursor->getExecutor(); // 5) Stream query results, adding them to a BSONArray as we go. BSONArrayBuilder firstBatch; BSONObj obj; PlanExecutor::ExecState state; long long numResults = 0; while (!enoughForFirstBatch(pq, numResults, firstBatch.len()) && PlanExecutor::ADVANCED == (state = cursorExec->getNext(&obj, NULL))) { // If adding this object will cause us to exceed the BSON size limit, then we stash // it for later. if (firstBatch.len() + obj.objsize() > BSONObjMaxUserSize && numResults > 0) { cursorExec->enqueue(obj); break; } // Add result to output buffer. firstBatch.append(obj); numResults++; } // Throw an assertion if query execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { const std::unique_ptr<PlanStageStats> stats(cursorExec->getStats()); error() << "Plan executor error during find command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::statsToBSON(*stats); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during find command: " << WorkingSetCommon::toStatusString(obj))); } // 6) Set up the cursor for getMore. if (shouldSaveCursor(txn, collection, state, cursorExec)) { // State will be restored on getMore. cursorExec->saveState(); cursorExec->detachFromOperationContext(); cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros()); cursor->setPos(numResults); } else { cursorId = 0; } // Fill out curop based on the results. endQueryOp(txn, *cursorExec, dbProfilingLevel, numResults, cursorId); // 7) Generate the response object to send to the client. appendCursorResponseObject(cursorId, nss.ns(), firstBatch.arr(), &result); if (cursorId) { cursorFreer.Dismiss(); } return true; }