コード例 #1
0
ファイル: view_catalog.cpp プロジェクト: Ferryworld/mongo
BSONObj ResolvedViewDefinition::asExpandedViewAggregation(const AggregationRequest& request) {
    BSONObjBuilder aggregationBuilder;

    // Perform the aggregation on the resolved namespace.
    aggregationBuilder.append("aggregate", collectionNss.coll());

    // The new pipeline consists of two parts: first, 'pipeline' in this ResolvedViewDefinition;
    // then, the pipeline in 'request'.
    BSONArrayBuilder pipelineBuilder(aggregationBuilder.subarrayStart("pipeline"));
    for (auto&& item : pipeline) {
        pipelineBuilder.append(item);
    }

    for (auto&& item : request.getPipeline()) {
        pipelineBuilder.append(item);
    }
    pipelineBuilder.doneFast();

    // The cursor option is always specified regardless of the presence of batchSize.
    if (request.getBatchSize()) {
        BSONObjBuilder batchSizeBuilder(aggregationBuilder.subobjStart("cursor"));
        batchSizeBuilder.append(AggregationRequest::kBatchSizeName, *request.getBatchSize());
        batchSizeBuilder.doneFast();
    } else {
        aggregationBuilder.append("cursor", BSONObj());
    }

    if (request.isExplain())
        aggregationBuilder.append("explain", true);

    return aggregationBuilder.obj();
}
コード例 #2
0
/**
 * Round trips the pipeline through serialization by calling serialize(), then Pipeline::parse().
 * fasserts if it fails to parse after being serialized.
 */
boost::intrusive_ptr<Pipeline> reparsePipeline(
    const boost::intrusive_ptr<Pipeline>& pipeline,
    const AggregationRequest& request,
    const boost::intrusive_ptr<ExpressionContext>& expCtx) {
    auto serialized = pipeline->serialize();

    // Convert vector<Value> to vector<BSONObj>.
    std::vector<BSONObj> parseableSerialization;
    parseableSerialization.reserve(serialized.size());
    for (auto&& serializedStage : serialized) {
        invariant(serializedStage.getType() == BSONType::Object);
        parseableSerialization.push_back(serializedStage.getDocument().toBson());
    }

    auto reparsedPipeline = Pipeline::parse(parseableSerialization, expCtx);
    if (!reparsedPipeline.isOK()) {
        error() << "Aggregation command did not round trip through parsing and serialization "
                   "correctly. Input pipeline: "
                << Value(request.getPipeline()).toString()
                << ", serialized pipeline: " << Value(serialized).toString();
        fassertFailedWithStatusNoTrace(40175, reparsedPipeline.getStatus());
    }

    reparsedPipeline.getValue()->injectExpressionContext(expCtx);
    reparsedPipeline.getValue()->optimizePipeline();
    return reparsedPipeline.getValue();
}
コード例 #3
0
ファイル: pipeline_command.cpp プロジェクト: mihail812/mongo
    bool runParsed(OperationContext* txn,
                   const NamespaceString& origNss,
                   const AggregationRequest& request,
                   BSONObj& cmdObj,
                   string& errmsg,
                   BSONObjBuilder& result) {
        // For operations on views, this will be the underlying namespace.
        const NamespaceString& nss = request.getNamespaceString();

        // Set up the ExpressionContext.
        intrusive_ptr<ExpressionContext> expCtx = new ExpressionContext(txn, request);
        expCtx->tempDir = storageGlobalParams.dbpath + "/_tmp";

        // Parse the pipeline.
        auto statusWithPipeline = Pipeline::parse(request.getPipeline(), expCtx);
        if (!statusWithPipeline.isOK()) {
            return appendCommandStatus(result, statusWithPipeline.getStatus());
        }
        auto pipeline = std::move(statusWithPipeline.getValue());

        auto resolvedNamespaces = resolveInvolvedNamespaces(txn, pipeline, expCtx);
        if (!resolvedNamespaces.isOK()) {
            return appendCommandStatus(result, resolvedNamespaces.getStatus());
        }
        expCtx->resolvedNamespaces = std::move(resolvedNamespaces.getValue());

        unique_ptr<ClientCursorPin> pin;  // either this OR the exec will be non-null
        unique_ptr<PlanExecutor> exec;
        auto curOp = CurOp::get(txn);
        {
            // This will throw if the sharding version for this connection is out of date. If the
            // namespace is a view, the lock will be released before re-running the aggregation.
            // Otherwise, the lock must be held continuously from now until we have we created both
            // the output ClientCursor and the input executor. This ensures that both are using the
            // same sharding version that we synchronize on here. This is also why we always need to
            // create a ClientCursor even when we aren't outputting to a cursor. See the comment on
            // ShardFilterStage for more details.
            AutoGetCollectionOrViewForRead ctx(txn, nss);
            Collection* collection = ctx.getCollection();

            // If running $collStats on a view, we do not resolve the view since we want stats
            // on this view namespace.
            auto startsWithCollStats = [&pipeline]() {
                const Pipeline::SourceContainer& sources = pipeline->getSources();
                return !sources.empty() &&
                    dynamic_cast<DocumentSourceCollStats*>(sources.front().get());
            };

            // If this is a view, resolve it by finding the underlying collection and stitching view
            // pipelines and this request's pipeline together. We then release our locks before
            // recursively calling run, which will re-acquire locks on the underlying collection.
            // (The lock must be released because recursively acquiring locks on the database will
            // prohibit yielding.)
            auto view = ctx.getView();
            if (view && !startsWithCollStats()) {
                auto viewDefinition =
                    ViewShardingCheck::getResolvedViewIfSharded(txn, ctx.getDb(), view);
                if (!viewDefinition.isOK()) {
                    return appendCommandStatus(result, viewDefinition.getStatus());
                }

                if (!viewDefinition.getValue().isEmpty()) {
                    ViewShardingCheck::appendShardedViewStatus(viewDefinition.getValue(), &result);
                    return false;
                }

                auto resolvedView = ctx.getDb()->getViewCatalog()->resolveView(txn, nss);
                if (!resolvedView.isOK()) {
                    return appendCommandStatus(result, resolvedView.getStatus());
                }

                // With the view resolved, we can relinquish locks.
                ctx.releaseLocksForView();

                // Parse the resolved view into a new aggregation request.
                auto newCmd = resolvedView.getValue().asExpandedViewAggregation(request);
                if (!newCmd.isOK()) {
                    return appendCommandStatus(result, newCmd.getStatus());
                }
                auto newNss = resolvedView.getValue().getNamespace();
                auto newRequest = AggregationRequest::parseFromBSON(newNss, newCmd.getValue());
                if (!newRequest.isOK()) {
                    return appendCommandStatus(result, newRequest.getStatus());
                }

                bool status = runParsed(
                    txn, origNss, newRequest.getValue(), newCmd.getValue(), errmsg, result);
                {
                    // Set the namespace of the curop back to the view namespace so ctx records
                    // stats on this view namespace on destruction.
                    stdx::lock_guard<Client>(*txn->getClient());
                    curOp->setNS_inlock(nss.ns());
                }
                return status;
            }

            // If the pipeline does not have a user-specified collation, set it from the collection
            // default.
            if (request.getCollation().isEmpty() && collection &&
                collection->getDefaultCollator()) {
                invariant(!expCtx->getCollator());
                expCtx->setCollator(collection->getDefaultCollator()->clone());
            }

            // Propagate the ExpressionContext throughout all of the pipeline's stages and
            // expressions.
            pipeline->injectExpressionContext(expCtx);

            // The pipeline must be optimized after the correct collator has been set on it (by
            // injecting the ExpressionContext containing the collator). This is necessary because
            // optimization may make string comparisons, e.g. optimizing {$eq: [<str1>, <str2>]} to
            // a constant.
            pipeline->optimizePipeline();

            if (kDebugBuild && !expCtx->isExplain && !expCtx->inShard) {
                // Make sure all operations round-trip through Pipeline::serialize() correctly by
                // re-parsing every command in debug builds. This is important because sharded
                // aggregations rely on this ability.  Skipping when inShard because this has
                // already been through the transformation (and this un-sets expCtx->inShard).
                pipeline = reparsePipeline(pipeline, request, expCtx);
            }

            // This does mongod-specific stuff like creating the input PlanExecutor and adding
            // it to the front of the pipeline if needed.
            PipelineD::prepareCursorSource(collection, pipeline);

            // Create the PlanExecutor which returns results from the pipeline. The WorkingSet
            // ('ws') and the PipelineProxyStage ('proxy') will be owned by the created
            // PlanExecutor.
            auto ws = make_unique<WorkingSet>();
            auto proxy = make_unique<PipelineProxyStage>(txn, pipeline, ws.get());

            auto statusWithPlanExecutor = (NULL == collection)
                ? PlanExecutor::make(
                      txn, std::move(ws), std::move(proxy), nss.ns(), PlanExecutor::YIELD_MANUAL)
                : PlanExecutor::make(
                      txn, std::move(ws), std::move(proxy), collection, PlanExecutor::YIELD_MANUAL);
            invariant(statusWithPlanExecutor.isOK());
            exec = std::move(statusWithPlanExecutor.getValue());

            {
                auto planSummary = Explain::getPlanSummary(exec.get());
                stdx::lock_guard<Client>(*txn->getClient());
                curOp->setPlanSummary_inlock(std::move(planSummary));
            }

            if (collection) {
                PlanSummaryStats stats;
                Explain::getSummaryStats(*exec, &stats);
                collection->infoCache()->notifyOfQuery(txn, stats.indexesUsed);
            }

            if (collection) {
                const bool isAggCursor = true;  // enable special locking behavior
                ClientCursor* cursor =
                    new ClientCursor(collection->getCursorManager(),
                                     exec.release(),
                                     nss.ns(),
                                     txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(),
                                     0,
                                     cmdObj.getOwned(),
                                     isAggCursor);
                pin.reset(new ClientCursorPin(collection->getCursorManager(), cursor->cursorid()));
                // Don't add any code between here and the start of the try block.
            }

            // At this point, it is safe to release the collection lock.
            // - In the case where we have a collection: we will need to reacquire the
            //   collection lock later when cleaning up our ClientCursorPin.
            // - In the case where we don't have a collection: our PlanExecutor won't be
            //   registered, so it will be safe to clean it up outside the lock.
            invariant(!exec || !collection);
        }

        try {
            // Unless set to true, the ClientCursor created above will be deleted on block exit.
            bool keepCursor = false;

            // Use of the aggregate command without specifying to use a cursor is deprecated.
            // Applications should migrate to using cursors. Cursors are strictly more useful than
            // outputting the results as a single document, since results that fit inside a single
            // BSONObj will also fit inside a single batch.
            //
            // We occasionally log a deprecation warning.
            if (!request.isCursorCommand()) {
                RARELY {
                    warning()
                        << "Use of the aggregate command without the 'cursor' "
                           "option is deprecated. See "
                           "http://dochub.mongodb.org/core/aggregate-without-cursor-deprecation.";
                }
            }

            // If both explain and cursor are specified, explain wins.
            if (expCtx->isExplain) {
                result << "stages" << Value(pipeline->writeExplainOps());
            } else if (request.isCursorCommand()) {
                keepCursor = handleCursorCommand(txn,
                                                 origNss.ns(),
                                                 pin.get(),
                                                 pin ? pin->c()->getExecutor() : exec.get(),
                                                 request,
                                                 result);
            } else {
                pipeline->run(result);
            }

            if (!expCtx->isExplain) {
                PlanSummaryStats stats;
                Explain::getSummaryStats(pin ? *pin->c()->getExecutor() : *exec.get(), &stats);
                curOp->debug().setPlanSummaryMetrics(stats);
                curOp->debug().nreturned = stats.nReturned;
            }

            // Clean up our ClientCursorPin, if needed.  We must reacquire the collection lock
            // in order to do so.
            if (pin) {
                // We acquire locks here with DBLock and CollectionLock instead of using
                // AutoGetCollectionForRead.  AutoGetCollectionForRead will throw if the
                // sharding version is out of date, and we don't care if the sharding version
                // has changed.
                Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS);
                Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS);
                if (keepCursor) {
                    pin->release();
                } else {
                    pin->deleteUnderlying();
                }
            }
        } catch (...) {
コード例 #4
0
Status runAggregate(OperationContext* opCtx,
                    const NamespaceString& origNss,
                    const AggregationRequest& request,
                    const BSONObj& cmdObj,
                    BSONObjBuilder& result) {
    // For operations on views, this will be the underlying namespace.
    NamespaceString nss = request.getNamespaceString();

    // The collation to use for this aggregation. boost::optional to distinguish between the case
    // where the collation has not yet been resolved, and where it has been resolved to nullptr.
    boost::optional<std::unique_ptr<CollatorInterface>> collatorToUse;

    unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec;
    boost::intrusive_ptr<ExpressionContext> expCtx;
    Pipeline* unownedPipeline;
    auto curOp = CurOp::get(opCtx);
    {
        const LiteParsedPipeline liteParsedPipeline(request);

        // Check whether the parsed pipeline supports the given read concern.
        liteParsedPipeline.assertSupportsReadConcern(opCtx, request.getExplain());

        if (liteParsedPipeline.hasChangeStream()) {
            nss = NamespaceString::kRsOplogNamespace;

            // If the read concern is not specified, upgrade to 'majority' and wait to make sure we
            // have a snapshot available.
            if (!repl::ReadConcernArgs::get(opCtx).hasLevel()) {
                const repl::ReadConcernArgs readConcern(
                    repl::ReadConcernLevel::kMajorityReadConcern);
                uassertStatusOK(waitForReadConcern(opCtx, readConcern, true));
            }

            if (!origNss.isCollectionlessAggregateNS()) {
                // AutoGetCollectionForReadCommand will raise an error if 'origNss' is a view.
                AutoGetCollectionForReadCommand origNssCtx(opCtx, origNss);

                // Resolve the collator to either the user-specified collation or the default
                // collation of the collection on which $changeStream was invoked, so that we do not
                // end up resolving the collation on the oplog.
                invariant(!collatorToUse);
                Collection* origColl = origNssCtx.getCollection();
                collatorToUse.emplace(resolveCollator(opCtx, request, origColl));
            }
        }

        const auto& pipelineInvolvedNamespaces = liteParsedPipeline.getInvolvedNamespaces();

        // If emplaced, AutoGetCollectionForReadCommand will throw if the sharding version for this
        // connection is out of date. If the namespace is a view, the lock will be released before
        // re-running the expanded aggregation.
        boost::optional<AutoGetCollectionForReadCommand> ctx;

        // If this is a collectionless aggregation, we won't create 'ctx' but will still need an
        // AutoStatsTracker to record CurOp and Top entries.
        boost::optional<AutoStatsTracker> statsTracker;

        // If this is a collectionless aggregation with no foreign namespaces, we don't want to
        // acquire any locks. Otherwise, lock the collection or view.
        if (nss.isCollectionlessAggregateNS() && pipelineInvolvedNamespaces.empty()) {
            statsTracker.emplace(opCtx, nss, Top::LockType::NotLocked, 0);
        } else {
            ctx.emplace(opCtx, nss, AutoGetCollection::ViewMode::kViewsPermitted);
        }

        Collection* collection = ctx ? ctx->getCollection() : nullptr;

        // The collator may already have been set if this is a $changeStream pipeline. If not,
        // resolve the collator to either the user-specified collation or the collection default.
        if (!collatorToUse) {
            collatorToUse.emplace(resolveCollator(opCtx, request, collection));
        }

        // If this is a view, resolve it by finding the underlying collection and stitching view
        // pipelines and this request's pipeline together. We then release our locks before
        // recursively calling runAggregate(), which will re-acquire locks on the underlying
        // collection.  (The lock must be released because recursively acquiring locks on the
        // database will prohibit yielding.)
        if (ctx && ctx->getView() && !liteParsedPipeline.startsWithCollStats()) {
            invariant(nss != NamespaceString::kRsOplogNamespace);
            invariant(!nss.isCollectionlessAggregateNS());
            // Check that the default collation of 'view' is compatible with the operation's
            // collation. The check is skipped if the request did not specify a collation.
            if (!request.getCollation().isEmpty()) {
                invariant(collatorToUse);  // Should already be resolved at this point.
                if (!CollatorInterface::collatorsMatch(ctx->getView()->defaultCollator(),
                                                       collatorToUse->get())) {
                    return {ErrorCodes::OptionNotSupportedOnView,
                            "Cannot override a view's default collation"};
                }
            }

            ViewShardingCheck::throwResolvedViewIfSharded(opCtx, ctx->getDb(), ctx->getView());

            auto resolvedView = ctx->getDb()->getViewCatalog()->resolveView(opCtx, nss);
            if (!resolvedView.isOK()) {
                return resolvedView.getStatus();
            }

            // With the view & collation resolved, we can relinquish locks.
            ctx.reset();

            // Parse the resolved view into a new aggregation request.
            auto newRequest = resolvedView.getValue().asExpandedViewAggregation(request);
            auto newCmd = newRequest.serializeToCommandObj().toBson();

            auto status = runAggregate(opCtx, origNss, newRequest, newCmd, result);
            {
                // Set the namespace of the curop back to the view namespace so ctx records
                // stats on this view namespace on destruction.
                stdx::lock_guard<Client> lk(*opCtx->getClient());
                curOp->setNS_inlock(nss.ns());
            }
            return status;
        }

        invariant(collatorToUse);
        expCtx.reset(
            new ExpressionContext(opCtx,
                                  request,
                                  std::move(*collatorToUse),
                                  std::make_shared<PipelineD::MongoDInterface>(opCtx),
                                  uassertStatusOK(resolveInvolvedNamespaces(opCtx, request))));
        expCtx->tempDir = storageGlobalParams.dbpath + "/_tmp";
        auto session = OperationContextSession::get(opCtx);
        expCtx->inSnapshotReadOrMultiDocumentTransaction =
            session && session->inSnapshotReadOrMultiDocumentTransaction();

        auto pipeline = uassertStatusOK(Pipeline::parse(request.getPipeline(), expCtx));

        // Check that the view's collation matches the collation of any views involved in the
        // pipeline.
        if (!pipelineInvolvedNamespaces.empty()) {
            invariant(ctx);
            auto pipelineCollationStatus = collatorCompatibleWithPipeline(
                opCtx, ctx->getDb(), expCtx->getCollator(), pipeline.get());
            if (!pipelineCollationStatus.isOK()) {
                return pipelineCollationStatus;
            }
        }

        pipeline->optimizePipeline();

        if (kDebugBuild && !expCtx->explain && !expCtx->fromMongos) {
            // Make sure all operations round-trip through Pipeline::serialize() correctly by
            // re-parsing every command in debug builds. This is important because sharded
            // aggregations rely on this ability.  Skipping when fromMongos because this has
            // already been through the transformation (and this un-sets expCtx->fromMongos).
            pipeline = reparsePipeline(pipeline.get(), request, expCtx);
        }

        // Prepare a PlanExecutor to provide input into the pipeline, if needed.
        if (liteParsedPipeline.hasChangeStream()) {
            // If we are using a change stream, the cursor stage should have a simple collation,
            // regardless of what the user's collation was.
            std::unique_ptr<CollatorInterface> collatorForCursor = nullptr;
            auto collatorStash = expCtx->temporarilyChangeCollator(std::move(collatorForCursor));
            PipelineD::prepareCursorSource(collection, nss, &request, pipeline.get());
        } else {
            PipelineD::prepareCursorSource(collection, nss, &request, pipeline.get());
        }
        // Optimize again, since there may be additional optimizations that can be done after adding
        // the initial cursor stage. Note this has to be done outside the above blocks to ensure
        // this process uses the correct collation if it does any string comparisons.
        pipeline->optimizePipeline();

        // Transfer ownership of the Pipeline to the PipelineProxyStage.
        unownedPipeline = pipeline.get();
        auto ws = make_unique<WorkingSet>();
        auto proxy = make_unique<PipelineProxyStage>(opCtx, std::move(pipeline), ws.get());

        // This PlanExecutor will simply forward requests to the Pipeline, so does not need to
        // yield or to be registered with any collection's CursorManager to receive invalidations.
        // The Pipeline may contain PlanExecutors which *are* yielding PlanExecutors and which *are*
        // registered with their respective collection's CursorManager
        auto statusWithPlanExecutor =
            PlanExecutor::make(opCtx, std::move(ws), std::move(proxy), nss, PlanExecutor::NO_YIELD);
        invariant(statusWithPlanExecutor.isOK());
        exec = std::move(statusWithPlanExecutor.getValue());

        {
            auto planSummary = Explain::getPlanSummary(exec.get());
            stdx::lock_guard<Client> lk(*opCtx->getClient());
            curOp->setPlanSummary_inlock(std::move(planSummary));
        }
    }

    // Having released the collection lock, we can now create a cursor that returns results from the
    // pipeline. This cursor owns no collection state, and thus we register it with the global
    // cursor manager. The global cursor manager does not deliver invalidations or kill
    // notifications; the underlying PlanExecutor(s) used by the pipeline will be receiving
    // invalidations and kill notifications themselves, not the cursor we create here.
    ClientCursorParams cursorParams(
        std::move(exec),
        origNss,
        AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserNames(),
        opCtx->recoveryUnit()->getReadConcernLevel(),
        cmdObj);
    if (expCtx->tailableMode == TailableModeEnum::kTailableAndAwaitData) {
        cursorParams.setTailable(true);
        cursorParams.setAwaitData(true);
    }

    auto pin =
        CursorManager::getGlobalCursorManager()->registerCursor(opCtx, std::move(cursorParams));

    ScopeGuard cursorFreer = MakeGuard(&ClientCursorPin::deleteUnderlying, &pin);

    // If both explain and cursor are specified, explain wins.
    if (expCtx->explain) {
        Explain::explainPipelineExecutor(
            pin.getCursor()->getExecutor(), *(expCtx->explain), &result);
    } else {
        // Cursor must be specified, if explain is not.
        const bool keepCursor =
            handleCursorCommand(opCtx, origNss, pin.getCursor(), request, result);
        if (keepCursor) {
            cursorFreer.Dismiss();
        }
    }

    if (!expCtx->explain) {
        PlanSummaryStats stats;
        Explain::getSummaryStats(*(pin.getCursor()->getExecutor()), &stats);
        curOp->debug().setPlanSummaryMetrics(stats);
        curOp->debug().nreturned = stats.nReturned;
    }

    // Any code that needs the cursor pinned must be inside the try block, above.
    return Status::OK();
}