void DocumentSourceGroup::populate() {
        const size_t numAccumulators = vpAccumulatorFactory.size();
        dassert(numAccumulators == vpExpression.size());

        // pushed to on spill()
        vector<shared_ptr<Sorter<Value, Value>::Iterator> > sortedFiles;
        int memoryUsageBytes = 0;

        // This loop consumes all input from pSource and buckets it based on pIdExpression.
        while (boost::optional<Document> input = pSource->getNext()) {
            if (memoryUsageBytes > _maxMemoryUsageBytes) {
                uassert(16945, "Exceeded memory limit for $group, but didn't allow external sort."
                               " Pass allowDiskUse:true to opt in.",
                        _extSortAllowed);
                sortedFiles.push_back(spill());
                memoryUsageBytes = 0;
            }

            _variables->setRoot(*input);

            /* get the _id value */
            Value id = computeId(_variables.get());

            /* treat missing values the same as NULL SERVER-4674 */
            if (id.missing())
                id = Value(BSONNULL);

            /*
              Look for the _id value in the map; if it's not there, add a
              new entry with a blank accumulator.
            */
            const size_t oldSize = groups.size();
            vector<intrusive_ptr<Accumulator> >& group = groups[id];
            const bool inserted = groups.size() != oldSize;

            if (inserted) {
                memoryUsageBytes += id.getApproximateSize();

                // Add the accumulators
                group.reserve(numAccumulators);
                for (size_t i = 0; i < numAccumulators; i++) {
                    group.push_back(vpAccumulatorFactory[i]());
                }
            } else {
                for (size_t i = 0; i < numAccumulators; i++) {
                    // subtract old mem usage. New usage added back after processing.
                    memoryUsageBytes -= group[i]->memUsageForSorter();
                }
            }

            /* tickle all the accumulators for the group we found */
            dassert(numAccumulators == group.size());
            for (size_t i = 0; i < numAccumulators; i++) {
                group[i]->process(vpExpression[i]->evaluate(_variables.get()), _doingMerge);
                memoryUsageBytes += group[i]->memUsageForSorter();
            }

            // We are done with the ROOT document so release it.
            _variables->clearRoot();

            DEV {
                // In debug mode, spill every time we have a duplicate id to stress merge logic.
                if (!inserted // is a dup
                        && !pExpCtx->inRouter // can't spill to disk in router
                        && !_extSortAllowed // don't change behavior when testing external sort
                        && sortedFiles.size() < 20 // don't open too many FDs
                        ) {
                    sortedFiles.push_back(spill());
                }
            }
        }

        // These blocks do any final steps necessary to prepare to output results.
        if (!sortedFiles.empty()) {
            _spilled = true;
            if (!groups.empty()) {
                sortedFiles.push_back(spill());
            }

            // We won't be using groups again so free its memory.
            GroupsMap().swap(groups);

            _sorterIterator.reset(
                    Sorter<Value,Value>::Iterator::merge(
                        sortedFiles, SortOptions(), SorterComparator()));

            // prepare current to accumulate data
            _currentAccumulators.reserve(numAccumulators);
            for (size_t i = 0; i < numAccumulators; i++) {
                _currentAccumulators.push_back(vpAccumulatorFactory[i]());
            }

            verify(_sorterIterator->more()); // we put data in, we should get something out.
            _firstPartOfNextGroup = _sorterIterator->next();
        } else {
            // start the group iterator
            groupsIterator = groups.begin();
        }

        populated = true;
    }
示例#2
0
void DocumentSourceGroup::initialize() {
    _initialized = true;
    const size_t numAccumulators = vpAccumulatorFactory.size();

    boost::optional<BSONObj> inputSort = findRelevantInputSort();
    if (inputSort) {
        // We can convert to streaming.
        _streaming = true;
        _inputSort = *inputSort;

        // Set up accumulators.
        _currentAccumulators.reserve(numAccumulators);
        for (size_t i = 0; i < numAccumulators; i++) {
            _currentAccumulators.push_back(vpAccumulatorFactory[i]());
            _currentAccumulators.back()->injectExpressionContext(pExpCtx);
        }

        // We only need to load the first document.
        _firstDocOfNextGroup = pSource->getNext();

        if (!_firstDocOfNextGroup) {
            return;
        }

        _variables->setRoot(*_firstDocOfNextGroup);

        // Compute the _id value.
        _currentId = computeId(_variables.get());
        return;
    }

    dassert(numAccumulators == vpExpression.size());

    // pushed to on spill()
    vector<shared_ptr<Sorter<Value, Value>::Iterator>> sortedFiles;
    int memoryUsageBytes = 0;

    // This loop consumes all input from pSource and buckets it based on pIdExpression.
    while (boost::optional<Document> input = pSource->getNext()) {
        if (memoryUsageBytes > _maxMemoryUsageBytes) {
            uassert(16945,
                    "Exceeded memory limit for $group, but didn't allow external sort."
                    " Pass allowDiskUse:true to opt in.",
                    _extSortAllowed);
            sortedFiles.push_back(spill());
            memoryUsageBytes = 0;
        }

        _variables->setRoot(*input);

        /* get the _id value */
        Value id = computeId(_variables.get());

        /*
          Look for the _id value in the map; if it's not there, add a
          new entry with a blank accumulator.
        */
        const size_t oldSize = _groups->size();
        vector<intrusive_ptr<Accumulator>>& group = (*_groups)[id];
        const bool inserted = _groups->size() != oldSize;

        if (inserted) {
            memoryUsageBytes += id.getApproximateSize();

            // Add the accumulators
            group.reserve(numAccumulators);
            for (size_t i = 0; i < numAccumulators; i++) {
                group.push_back(vpAccumulatorFactory[i]());
                group.back()->injectExpressionContext(pExpCtx);
            }
        } else {
            for (size_t i = 0; i < numAccumulators; i++) {
                // subtract old mem usage. New usage added back after processing.
                memoryUsageBytes -= group[i]->memUsageForSorter();
            }
        }

        /* tickle all the accumulators for the group we found */
        dassert(numAccumulators == group.size());
        for (size_t i = 0; i < numAccumulators; i++) {
            group[i]->process(vpExpression[i]->evaluate(_variables.get()), _doingMerge);
            memoryUsageBytes += group[i]->memUsageForSorter();
        }

        // We are done with the ROOT document so release it.
        _variables->clearRoot();

        if (kDebugBuild && !storageGlobalParams.readOnly) {
            // In debug mode, spill every time we have a duplicate id to stress merge logic.
            if (!inserted  // is a dup
                &&
                !pExpCtx->inRouter  // can't spill to disk in router
                &&
                !_extSortAllowed  // don't change behavior when testing external sort
                &&
                sortedFiles.size() < 20  // don't open too many FDs
                ) {
                sortedFiles.push_back(spill());
            }
        }
    }

    // These blocks do any final steps necessary to prepare to output results.
    if (!sortedFiles.empty()) {
        _spilled = true;
        if (!_groups->empty()) {
            sortedFiles.push_back(spill());
        }

        // We won't be using groups again so free its memory.
        _groups = pExpCtx->getValueComparator().makeUnorderedValueMap<Accumulators>();

        _sorterIterator.reset(Sorter<Value, Value>::Iterator::merge(
            sortedFiles, SortOptions(), SorterComparator(pExpCtx->getValueComparator())));

        // prepare current to accumulate data
        _currentAccumulators.reserve(numAccumulators);
        for (size_t i = 0; i < numAccumulators; i++) {
            _currentAccumulators.push_back(vpAccumulatorFactory[i]());
            _currentAccumulators.back()->injectExpressionContext(pExpCtx);
        }

        verify(_sorterIterator->more());  // we put data in, we should get something out.
        _firstPartOfNextGroup = _sorterIterator->next();
    } else {
        // start the group iterator
        groupsIterator = _groups->begin();
    }
}