StatusWith<LogicalSessionIdSet> SessionsCollectionSharded::findRemovedSessions( OperationContext* opCtx, const LogicalSessionIdSet& sessions) { auto send = [&](BSONObj toSend) -> StatusWith<BSONObj> { const NamespaceString nss(SessionsCollection::kSessionsFullNS); auto qr = QueryRequest::makeFromFindCommand(nss, toSend, false); if (!qr.isOK()) { return qr.getStatus(); } const boost::intrusive_ptr<ExpressionContext> expCtx; auto cq = CanonicalQuery::canonicalize(opCtx, std::move(qr.getValue()), expCtx, ExtensionsCallbackNoop(), MatchExpressionParser::kAllowAllSpecialFeatures & ~MatchExpressionParser::AllowedFeatures::kExpr); if (!cq.isOK()) { return cq.getStatus(); } // Do the work to generate the first batch of results. This blocks waiting to get responses // from the shard(s). std::vector<BSONObj> batch; BSONObj viewDefinition; auto cursorId = ClusterFind::runQuery( opCtx, *cq.getValue(), ReadPreferenceSetting::get(opCtx), &batch, &viewDefinition); if (!cursorId.isOK()) { return cursorId.getStatus(); } BSONObjBuilder result; CursorResponseBuilder firstBatch(/*firstBatch*/ true, &result); for (const auto& obj : batch) { firstBatch.append(obj); } firstBatch.done(cursorId.getValue(), nss.ns()); return result.obj(); }; return doFetch(sessions, send); }
StatusWith<LogicalSessionIdSet> SessionsCollectionSharded::findRemovedSessions( OperationContext* opCtx, const LogicalSessionIdSet& sessions) { auto send = [&](BSONObj toSend) -> StatusWith<BSONObj> { auto qr = QueryRequest::makeFromFindCommand( NamespaceString::kLogicalSessionsNamespace, toSend, false); if (!qr.isOK()) { return qr.getStatus(); } const boost::intrusive_ptr<ExpressionContext> expCtx; auto cq = CanonicalQuery::canonicalize(opCtx, std::move(qr.getValue()), expCtx, ExtensionsCallbackNoop(), MatchExpressionParser::kBanAllSpecialFeatures); if (!cq.isOK()) { return cq.getStatus(); } // Do the work to generate the first batch of results. This blocks waiting to get responses // from the shard(s). std::vector<BSONObj> batch; CursorId cursorId; try { cursorId = ClusterFind::runQuery( opCtx, *cq.getValue(), ReadPreferenceSetting::get(opCtx), &batch); } catch (const DBException& ex) { return ex.toStatus(); } rpc::OpMsgReplyBuilder replyBuilder; CursorResponseBuilder::Options options; options.isInitialResponse = true; CursorResponseBuilder firstBatch(&replyBuilder, options); for (const auto& obj : batch) { firstBatch.append(obj); } firstBatch.done(cursorId, NamespaceString::kLogicalSessionsNamespace.ns()); return replyBuilder.releaseBody(); }; return doFetch(NamespaceString::kLogicalSessionsNamespace, sessions, send); }
/** * Runs a query using the following steps: * --Parsing. * --Acquire locks. * --Plan query, obtaining an executor that can run it. * --Generate the first batch. * --Save state for getMore, transferring ownership of the executor to a ClientCursor. * --Generate response to send to the client. */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { const std::string fullns = parseNs(dbname, cmdObj); const NamespaceString nss(fullns); if (!nss.isValid() || nss.isCommand() || nss.isSpecialCommand()) { return appendCommandStatus(result, {ErrorCodes::InvalidNamespace, str::stream() << "Invalid collection name: " << nss.ns()}); } // Although it is a command, a find command gets counted as a query. globalOpCounters.gotQuery(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus( result, Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()")); } // Parse the command BSON to a LiteParsedQuery. const bool isExplain = false; auto lpqStatus = LiteParsedQuery::makeFromFindCommand(nss, cmdObj, isExplain); if (!lpqStatus.isOK()) { return appendCommandStatus(result, lpqStatus.getStatus()); } auto& lpq = lpqStatus.getValue(); // Validate term before acquiring locks, if provided. if (auto term = lpq->getReplicationTerm()) { auto replCoord = repl::ReplicationCoordinator::get(txn); Status status = replCoord->updateTerm(txn, *term); // Note: updateTerm returns ok if term stayed the same. if (!status.isOK()) { return appendCommandStatus(result, status); } } // Fill out curop information. // // We pass negative values for 'ntoreturn' and 'ntoskip' to indicate that these values // should be omitted from the log line. Limit and skip information is already present in the // find command parameters, so these fields are redundant. const int ntoreturn = -1; const int ntoskip = -1; beginQueryOp(txn, nss, cmdObj, ntoreturn, ntoskip); // Finish the parsing step by using the LiteParsedQuery to create a CanonicalQuery. ExtensionsCallbackReal extensionsCallback(txn, &nss); auto statusWithCQ = CanonicalQuery::canonicalize(lpq.release(), extensionsCallback); if (!statusWithCQ.isOK()) { return appendCommandStatus(result, statusWithCQ.getStatus()); } std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); ShardingState* const shardingState = ShardingState::get(txn); if (OperationShardVersion::get(txn).hasShardVersion() && shardingState->enabled()) { ChunkVersion receivedVersion = OperationShardVersion::get(txn).getShardVersion(nss); ChunkVersion latestVersion; // Wait for migration completion to get the correct chunk version. const int maxTimeoutSec = 30; int timeoutSec = cq->getParsed().getMaxTimeMS() / 1000; if (!timeoutSec || timeoutSec > maxTimeoutSec) { timeoutSec = maxTimeoutSec; } if (!shardingState->waitTillNotInCriticalSection(timeoutSec)) { uasserted(ErrorCodes::LockTimeout, "Timeout while waiting for migration commit"); } // If the received version is newer than the version cached in 'shardingState', then we // have to refresh 'shardingState' from the config servers. We do this before acquiring // locks so that we don't hold locks while waiting on the network. uassertStatusOK(shardingState->refreshMetadataIfNeeded( txn, nss.ns(), receivedVersion, &latestVersion)); } // Acquire locks. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); const int dbProfilingLevel = ctx.getDb() ? ctx.getDb()->getProfilingLevel() : serverGlobalParams.defaultProfile; // It is possible that the sharding version will change during yield while we are // retrieving a plan executor. If this happens we will throw an error and mongos will // retry. const ChunkVersion shardingVersionAtStart = shardingState->getVersion(nss.ns()); // Get the execution plan for the query. auto statusWithPlanExecutor = getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); if (!collection) { // No collection. Just fill out curop indicating that there were zero results and // there is no ClientCursor id, and then return. const long long numResults = 0; const CursorId cursorId = 0; endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, cursorId); appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); return true; } const LiteParsedQuery& pq = exec->getCanonicalQuery()->getParsed(); // Stream query results, adding them to a BSONArray as we go. CursorResponseBuilder firstBatch(/*isInitialResponse*/ true, &result); BSONObj obj; PlanExecutor::ExecState state = PlanExecutor::ADVANCED; long long numResults = 0; while (!FindCommon::enoughForFirstBatch(pq, numResults, firstBatch.bytesUsed()) && PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // If adding this object will cause us to exceed the BSON size limit, then we stash // it for later. if (firstBatch.bytesUsed() + obj.objsize() > BSONObjMaxUserSize && numResults > 0) { exec->enqueue(obj); break; } // Add result to output buffer. firstBatch.append(obj); numResults++; } // Throw an assertion if query execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { firstBatch.abandon(); const std::unique_ptr<PlanStageStats> stats(exec->getStats()); error() << "Plan executor error during find command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::statsToBSON(*stats); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during find command: " << WorkingSetCommon::toStatusString(obj))); } // TODO: Currently, chunk ranges are kept around until all ClientCursors created while the // chunk belonged on this node are gone. Separating chunk lifetime management from // ClientCursor should allow this check to go away. if (!shardingState->getVersion(nss.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { // Version changed while retrieving a PlanExecutor. Terminate the operation, // signaling that mongos should retry. throw SendStaleConfigException(nss.ns(), "version changed during find command", shardingVersionAtStart, shardingState->getVersion(nss.ns())); } // Set up the cursor for getMore. CursorId cursorId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // Register the execution plan inside a ClientCursor. Ownership of the PlanExecutor is // transferred to the ClientCursor. // // First unregister the PlanExecutor so it can be re-registered with ClientCursor. exec->deregisterExec(); // Create a ClientCursor containing this plan executor. We don't have to worry about // leaking it as it's inserted into a global map by its ctor. ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), pq.getOptions(), pq.getFilter()); cursorId = cursor->cursorid(); invariant(!exec); PlanExecutor* cursorExec = cursor->getExecutor(); // State will be restored on getMore. cursorExec->saveState(); cursorExec->detachFromOperationContext(); cursor->setLeftoverMaxTimeMicros(CurOp::get(txn)->getRemainingMaxTimeMicros()); cursor->setPos(numResults); // Fill out curop based on the results. endQueryOp(txn, collection, *cursorExec, dbProfilingLevel, numResults, cursorId); } else { endQueryOp(txn, collection, *exec, dbProfilingLevel, numResults, cursorId); } // Generate the response object to send to the client. firstBatch.done(cursorId, nss.ns()); return true; }
/** * Runs a query using the following steps: * --Parsing. * --Acquire locks. * --Plan query, obtaining an executor that can run it. * --Generate the first batch. * --Save state for getMore, transferring ownership of the executor to a ClientCursor. * --Generate response to send to the client. */ bool run(OperationContext* txn, const std::string& dbname, BSONObj& cmdObj, int options, std::string& errmsg, BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbname, cmdObj)); if (!nss.isValid() || nss.isCommand() || nss.isSpecialCommand()) { return appendCommandStatus(result, {ErrorCodes::InvalidNamespace, str::stream() << "Invalid collection name: " << nss.ns()}); } // Although it is a command, a find command gets counted as a query. globalOpCounters.gotQuery(); if (txn->getClient()->isInDirectClient()) { return appendCommandStatus( result, Status(ErrorCodes::IllegalOperation, "Cannot run find command from eval()")); } // Parse the command BSON to a QueryRequest. const bool isExplain = false; auto qrStatus = QueryRequest::makeFromFindCommand(nss, cmdObj, isExplain); if (!qrStatus.isOK()) { return appendCommandStatus(result, qrStatus.getStatus()); } auto& qr = qrStatus.getValue(); // Validate term before acquiring locks, if provided. if (auto term = qr->getReplicationTerm()) { auto replCoord = repl::ReplicationCoordinator::get(txn); Status status = replCoord->updateTerm(txn, *term); // Note: updateTerm returns ok if term stayed the same. if (!status.isOK()) { return appendCommandStatus(result, status); } } // Fill out curop information. // // We pass negative values for 'ntoreturn' and 'ntoskip' to indicate that these values // should be omitted from the log line. Limit and skip information is already present in the // find command parameters, so these fields are redundant. const int ntoreturn = -1; const int ntoskip = -1; beginQueryOp(txn, nss, cmdObj, ntoreturn, ntoskip); // Finish the parsing step by using the QueryRequest to create a CanonicalQuery. ExtensionsCallbackReal extensionsCallback(txn, &nss); auto statusWithCQ = CanonicalQuery::canonicalize(txn, std::move(qr), extensionsCallback); if (!statusWithCQ.isOK()) { return appendCommandStatus(result, statusWithCQ.getStatus()); } std::unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); // Acquire locks. AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); // Get the execution plan for the query. auto statusWithPlanExecutor = getExecutorFind(txn, collection, nss, std::move(cq), PlanExecutor::YIELD_AUTO); if (!statusWithPlanExecutor.isOK()) { return appendCommandStatus(result, statusWithPlanExecutor.getStatus()); } std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); { stdx::lock_guard<Client>(*txn->getClient()); CurOp::get(txn)->setPlanSummary_inlock(Explain::getPlanSummary(exec.get())); } if (!collection) { // No collection. Just fill out curop indicating that there were zero results and // there is no ClientCursor id, and then return. const long long numResults = 0; const CursorId cursorId = 0; endQueryOp(txn, collection, *exec, numResults, cursorId); appendCursorResponseObject(cursorId, nss.ns(), BSONArray(), &result); return true; } const QueryRequest& originalQR = exec->getCanonicalQuery()->getQueryRequest(); // Stream query results, adding them to a BSONArray as we go. CursorResponseBuilder firstBatch(/*isInitialResponse*/ true, &result); BSONObj obj; PlanExecutor::ExecState state = PlanExecutor::ADVANCED; long long numResults = 0; while (!FindCommon::enoughForFirstBatch(originalQR, numResults) && PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) { // If we can't fit this result inside the current batch, then we stash it for later. if (!FindCommon::haveSpaceForNext(obj, numResults, firstBatch.bytesUsed())) { exec->enqueue(obj); break; } // Add result to output buffer. firstBatch.append(obj); numResults++; } // Throw an assertion if query execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { firstBatch.abandon(); error() << "Plan executor error during find command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::getWinningPlanStats(exec.get()); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during find command: " << WorkingSetCommon::toStatusString(obj))); } // Before saving the cursor, ensure that whatever plan we established happened with the // expected collection version auto css = CollectionShardingState::get(txn, nss); css->checkShardVersionOrThrow(txn); // Set up the cursor for getMore. CursorId cursorId = 0; if (shouldSaveCursor(txn, collection, state, exec.get())) { // Register the execution plan inside a ClientCursor. Ownership of the PlanExecutor is // transferred to the ClientCursor. // // First unregister the PlanExecutor so it can be re-registered with ClientCursor. exec->deregisterExec(); // Create a ClientCursor containing this plan executor. We don't have to worry about // leaking it as it's inserted into a global map by its ctor. ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), originalQR.getOptions(), cmdObj.getOwned()); cursorId = cursor->cursorid(); invariant(!exec); PlanExecutor* cursorExec = cursor->getExecutor(); // State will be restored on getMore. cursorExec->saveState(); cursorExec->detachFromOperationContext(); cursor->setLeftoverMaxTimeMicros(txn->getRemainingMaxTimeMicros()); cursor->setPos(numResults); // Fill out curop based on the results. endQueryOp(txn, collection, *cursorExec, numResults, cursorId); } else { endQueryOp(txn, collection, *exec, numResults, cursorId); } // Generate the response object to send to the client. firstBatch.done(cursorId, nss.ns()); return true; }