Example #1
0
PlanStage::StageState MergeSortStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (!_noResultToMerge.empty()) {
        // We have some child that we don't have a result from.  Each child must have a result
        // in order to pick the minimum result among all our children.  Work a child.
        PlanStage* child = _noResultToMerge.front();
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child->work(&id);

        if (PlanStage::ADVANCED == code) {
            WorkingSetMember* member = _ws->get(id);

            // If we're deduping...
            if (_dedup) {
                if (!member->hasLoc()) {
                    // Can't dedup data unless there's a RecordId.  We go ahead and use its
                    // result.
                    _noResultToMerge.pop();
                } else {
                    ++_specificStats.dupsTested;
                    // ...and there's a diskloc and and we've seen the RecordId before
                    if (_seen.end() != _seen.find(member->loc)) {
                        // ...drop it.
                        _ws->free(id);
                        ++_commonStats.needTime;
                        ++_specificStats.dupsDropped;
                        return PlanStage::NEED_TIME;
                    } else {
                        // Otherwise, note that we've seen it.
                        _seen.insert(member->loc);
                        // We're going to use the result from the child, so we remove it from
                        // the queue of children without a result.
                        _noResultToMerge.pop();
                    }
                }
            } else {
                // Not deduping.  We use any result we get from the child.  Remove the child
                // from the queue of things without a result.
                _noResultToMerge.pop();
            }

            // Store the result in our list.
            StageWithValue value;
            value.id = id;
            value.stage = child;
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwned();
            _mergingData.push_front(value);

            // Insert the result (indirectly) into our priority queue.
            _merging.push(_mergingData.begin());

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // There are no more results possible from this child.  Don't bother with it
            // anymore.
            _noResultToMerge.pop();
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "merge sort stage failed to read in results from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else {
            if (PlanStage::NEED_TIME == code) {
                ++_commonStats.needTime;
            } else if (PlanStage::NEED_YIELD == code) {
                *out = id;
                ++_commonStats.needYield;
            }

            return code;
        }
    }

    // If we're here, for each non-EOF child, we have a valid WSID.
    verify(!_merging.empty());

    // Get the 'min' WSID.  _merging is a priority queue so its top is the smallest.
    MergingRef top = _merging.top();
    _merging.pop();

    // Since we're returning the WSID that came from top->stage, we need to work(...) it again
    // to get a new result.
    _noResultToMerge.push(top->stage);

    // Save the ID that we're returning and remove the returned result from our data.
    WorkingSetID idToTest = top->id;
    _mergingData.erase(top);

    // Return the min.
    *out = idToTest;
    ++_commonStats.advanced;

    // But don't return it if it's flagged.
    if (_ws->isFlagged(*out)) {
        return PlanStage::NEED_TIME;
    }

    return PlanStage::ADVANCED;
}
Example #2
0
    PlanStage::StageState AndSortedStage::moveTowardTargetLoc(WorkingSetID* out) {
        verify(numeric_limits<size_t>::max() != _targetNode);
        verify(WorkingSet::INVALID_ID != _targetId);

        // We have nodes that haven't hit _targetLoc yet.
        size_t workingChildNumber = _workingTowardRep.front();
        PlanStage* next = _children[workingChildNumber];
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState state = next->work(&id);

        if (PlanStage::ADVANCED == state) {
            WorkingSetMember* member = _ws->get(id);

            // Maybe the child had an invalidation.  We intersect DiskLoc(s) so we can't do anything
            // with this WSM.
            if (!member->hasLoc()) {
                _ws->flagForReview(id);
                return PlanStage::NEED_TIME;
            }

            verify(member->hasLoc());

            if (member->loc == _targetLoc) {
                // The front element has hit _targetLoc.  Don't move it forward anymore/work on
                // another element.
                _workingTowardRep.pop();
                AndCommon::mergeFrom(_ws->get(_targetId), *member);
                _ws->free(id);

                if (0 == _workingTowardRep.size()) {
                    WorkingSetID toReturn = _targetId;
                    WorkingSetMember* toMatchTest = _ws->get(toReturn);

                    _targetNode = numeric_limits<size_t>::max();
                    _targetId = WorkingSet::INVALID_ID;
                    _targetLoc = DiskLoc();

                    // Everyone hit it, hooray.  Return it, if it matches.
                    if (Filter::passes(toMatchTest, _filter)) {
                        if (NULL != _filter) {
                            ++_specificStats.matchTested;
                        }

                        *out = toReturn;
                        ++_commonStats.advanced;
                        return PlanStage::ADVANCED;
                    }
                    else {
                        _ws->free(toReturn);
                        ++_commonStats.needTime;
                        return PlanStage::NEED_TIME;
                    }
                }
                // More children need to be advanced to _targetLoc.
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else if (member->loc < _targetLoc) {
                // The front element of _workingTowardRep hasn't hit the thing we're AND-ing with
                // yet.  Try again later.
                _ws->free(id);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else {
                // member->loc > _targetLoc.
                // _targetLoc wasn't successfully AND-ed with the other sub-plans.  We toss it and
                // try AND-ing with the next value.
                _specificStats.failedAnd[_targetNode]++;

                _ws->free(_targetId);
                _targetNode = workingChildNumber;
                _targetLoc = member->loc;
                _targetId = id;
                _workingTowardRep = std::queue<size_t>();
                for (size_t i = 0; i < _children.size(); ++i) {
                    if (workingChildNumber != i) {
                        _workingTowardRep.push(i);
                    }
                }
                // Need time to chase after the new _targetLoc.
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
        else if (PlanStage::IS_EOF == state) {
            _isEOF = true;
            _ws->free(_targetId);
            return state;
        }
        else if (PlanStage::FAILURE == state) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "sorted AND stage failed to read in results from child " << workingChildNumber;
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
            }
            _isEOF = true;
            _ws->free(_targetId);
            return state;
        }
        else {
            if (PlanStage::NEED_TIME == state) {
                ++_commonStats.needTime;
            }
            else if (PlanStage::NEED_FETCH == state) {
                ++_commonStats.needFetch;
                *out = id;
            }

            return state;
        }
    }
Example #3
0
    PlanStage::StageState AndHashStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }

        // Fast-path for one of our children being EOF immediately.  We work each child a few times.
        // If it hits EOF, the AND cannot output anything.  If it produces a result, we stash that
        // result in _lookAheadResults.
        if (_lookAheadResults.empty()) {
            // INVALID_ID means that the child didn't produce a valid result.

            // We specifically are not using .resize(size, value) here because C++11 builds don't
            // seem to resolve WorkingSet::INVALID_ID during linking.
            _lookAheadResults.resize(_children.size());
            for (size_t i = 0; i < _children.size(); ++i) {
                _lookAheadResults[i] =  WorkingSet::INVALID_ID;
            }

            // Work each child some number of times until it's either EOF or produces
            // a result.  If it's EOF this whole stage will be EOF.  If it produces a
            // result we cache it for later.
            for (size_t i = 0; i < _children.size(); ++i) {
                PlanStage* child = _children[i];
                for (size_t j = 0; j < kLookAheadWorks; ++j) {
                    StageState childStatus = child->work(&_lookAheadResults[i]);

                    if (PlanStage::IS_EOF == childStatus || PlanStage::DEAD == childStatus) {

                        // A child went right to EOF.  Bail out.
                        _hashingChildren = false;
                        _dataMap.clear();
                        return PlanStage::IS_EOF;
                    }
                    else if (PlanStage::ADVANCED == childStatus) {
                        // We have a result cached in _lookAheadResults[i].  Stop looking at this
                        // child.
                        break;
                    }
                    else if (PlanStage::FAILURE == childStatus) {
                        // Propage error to parent.
                        *out = _lookAheadResults[i];
                        // If a stage fails, it may create a status WSM to indicate why it
                        // failed, in which case 'id' is valid.  If ID is invalid, we
                        // create our own error message.
                        if (WorkingSet::INVALID_ID == *out) {
                            mongoutils::str::stream ss;
                            ss << "hashed AND stage failed to read in look ahead results "
                               << "from child " << i;
                            Status status(ErrorCodes::InternalError, ss);
                            *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                        }

                        _hashingChildren = false;
                        _dataMap.clear();
                        return PlanStage::FAILURE;
                    }
                    // We ignore NEED_TIME. TODO: what do we want to do if we get NEED_YIELD here?
                }
            }

            // We did a bunch of work above, return NEED_TIME to be fair.
            return PlanStage::NEED_TIME;
        }

        // An AND is either reading the first child into the hash table, probing against the hash
        // table with subsequent children, or checking the last child's results to see if they're
        // in the hash table.

        // We read the first child into our hash table.
        if (_hashingChildren) {
            // Check memory usage of previously hashed results.
            if (_memUsage > _maxMemUsage) {
                mongoutils::str::stream ss;
                ss << "hashed AND stage buffered data usage of " << _memUsage
                   << " bytes exceeds internal limit of " << kDefaultMaxMemUsageBytes << " bytes";
                Status status(ErrorCodes::Overflow, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                return PlanStage::FAILURE;
            }

            if (0 == _currentChild) {
                return readFirstChild(out);
            }
            else if (_currentChild < _children.size() - 1) {
                return hashOtherChildren(out);
            }
            else {
                _hashingChildren = false;
                // We don't hash our last child.  Instead, we probe the table created from the
                // previous children, returning results in the order of the last child.
                // Fall through to below.
            }
        }

        // Returning results.  We read from the last child and return the results that are in our
        // hash map.

        // We should be EOF if we're not hashing results and the dataMap is empty.
        verify(!_dataMap.empty());

        // We probe _dataMap with the last child.
        verify(_currentChild == _children.size() - 1);

        // Get the next result for the (_children.size() - 1)-th child.
        StageState childStatus = workChild(_children.size() - 1, out);
        if (PlanStage::ADVANCED != childStatus) {
            return childStatus;
        }

        // We know that we've ADVANCED.  See if the WSM is in our table.
        WorkingSetMember* member = _ws->get(*out);

        // Maybe the child had an invalidation.  We intersect RecordId(s) so we can't do anything
        // with this WSM.
        if (!member->hasLoc()) {
            _ws->flagForReview(*out);
            return PlanStage::NEED_TIME;
        }

        DataMap::iterator it = _dataMap.find(member->loc);
        if (_dataMap.end() == it) {
            // Child's output wasn't in every previous child.  Throw it out.
            _ws->free(*out);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else {
            // Child's output was in every previous child.  Merge any key data in
            // the child's output and free the child's just-outputted WSM.
            WorkingSetID hashID = it->second;
            _dataMap.erase(it);

            WorkingSetMember* olderMember = _ws->get(hashID);
            AndCommon::mergeFrom(olderMember, *member);
            _ws->free(*out);

            // We should check for matching at the end so the matcher can use information in the
            // indices of all our children.
            if (Filter::passes(olderMember, _filter)) {
                *out = hashID;
                ++_commonStats.advanced;
                return PlanStage::ADVANCED;
            }
            else {
                _ws->free(hashID);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
    }
Example #4
0
PlanStage::StageState MergeSortStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (!_noResultToMerge.empty()) {
        // We have some child that we don't have a result from.  Each child must have a result
        // in order to pick the minimum result among all our children.  Work a child.
        PlanStage* child = _noResultToMerge.front();
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child->work(&id);

        if (PlanStage::ADVANCED == code) {
            WorkingSetMember* member = _ws->get(id);

            // If we're deduping...
            if (_dedup) {
                if (!member->hasRecordId()) {
                    // Can't dedup data unless there's a RecordId.  We go ahead and use its
                    // result.
                    _noResultToMerge.pop();
                } else {
                    ++_specificStats.dupsTested;
                    // ...and there's a RecordId and and we've seen the RecordId before
                    if (_seen.end() != _seen.find(member->recordId)) {
                        // ...drop it.
                        _ws->free(id);
                        ++_specificStats.dupsDropped;
                        return PlanStage::NEED_TIME;
                    } else {
                        // Otherwise, note that we've seen it.
                        _seen.insert(member->recordId);
                        // We're going to use the result from the child, so we remove it from
                        // the queue of children without a result.
                        _noResultToMerge.pop();
                    }
                }
            } else {
                // Not deduping.  We use any result we get from the child.  Remove the child
                // from the queue of things without a result.
                _noResultToMerge.pop();
            }

            // Store the result in our list.
            StageWithValue value;
            value.id = id;
            value.stage = child;
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwnedIfNeeded();
            _mergingData.push_front(value);

            // Insert the result (indirectly) into our priority queue.
            _merging.push(_mergingData.begin());

            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // There are no more results possible from this child.  Don't bother with it
            // anymore.
            _noResultToMerge.pop();
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code) {
            // The stage which produces a failure is responsible for allocating a working set member
            // with error details.
            invariant(WorkingSet::INVALID_ID != id);
            *out = id;
            return code;
        } else if (PlanStage::NEED_YIELD == code) {
            *out = id;
            return code;
        } else {
            return code;
        }
    }

    // If we're here, for each non-EOF child, we have a valid WSID.
    verify(!_merging.empty());

    // Get the 'min' WSID.  _merging is a priority queue so its top is the smallest.
    MergingRef top = _merging.top();
    _merging.pop();

    // Since we're returning the WSID that came from top->stage, we need to work(...) it again
    // to get a new result.
    _noResultToMerge.push(top->stage);

    // Save the ID that we're returning and remove the returned result from our data.
    WorkingSetID idToTest = top->id;
    _mergingData.erase(top);

    // Return the min.
    *out = idToTest;

    return PlanStage::ADVANCED;
}
Example #5
0
    PlanStage::StageState MergeSortStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        if (isEOF()) { return PlanStage::IS_EOF; }

        if (!_noResultToMerge.empty()) {
            // We have some child that we don't have a result from.  Each child must have a result
            // in order to pick the minimum result among all our children.  Work a child.
            PlanStage* child = _noResultToMerge.front();
            WorkingSetID id = WorkingSet::INVALID_ID;
            StageState code = child->work(&id);

            if (PlanStage::ADVANCED == code) {
                // If we're deduping...
                if (_dedup) {
                    WorkingSetMember* member = _ws->get(id);

                    if (!member->hasLoc()) {
                        // Can't dedup data unless there's a DiskLoc.  We go ahead and use its
                        // result.
                        _noResultToMerge.pop();
                    }
                    else {
                        ++_specificStats.dupsTested;
                        // ...and there's a diskloc and and we've seen the DiskLoc before
                        if (_seen.end() != _seen.find(member->loc)) {
                            // ...drop it.
                            _ws->free(id);
                            ++_commonStats.needTime;
                            ++_specificStats.dupsDropped;
                            return PlanStage::NEED_TIME;
                        }
                        else {
                            // Otherwise, note that we've seen it.
                            _seen.insert(member->loc);
                            // We're going to use the result from the child, so we remove it from
                            // the queue of children without a result.
                            _noResultToMerge.pop();
                        }
                    }
                }
                else {
                    // Not deduping.  We use any result we get from the child.  Remove the child
                    // from the queue of things without a result.
                    _noResultToMerge.pop();
                }

                // Store the result in our list.
                StageWithValue value;
                value.id = id;
                value.stage = child;
                _mergingData.push_front(value);

                // Insert the result (indirectly) into our priority queue.
                _merging.push(_mergingData.begin());

                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else if (PlanStage::IS_EOF == code) {
                // There are no more results possible from this child.  Don't bother with it
                // anymore.
                _noResultToMerge.pop();
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else if (PlanStage::FAILURE == code) {
                *out = id;
                return code;
            }
            else {
                if (PlanStage::NEED_FETCH == code) {
                    *out = id;
                    ++_commonStats.needFetch;
                }
                else if (PlanStage::NEED_TIME == code) {
                    ++_commonStats.needTime;
                }
                return code;
            }
        }

        // If we're here, for each non-EOF child, we have a valid WSID.
        verify(!_merging.empty());

        // Get the 'min' WSID.  _merging is a priority queue so its top is the smallest.
        MergingRef top = _merging.top();
        _merging.pop();

        // Since we're returning the WSID that came from top->stage, we need to work(...) it again
        // to get a new result.
        _noResultToMerge.push(top->stage);

        // Save the ID that we're returning and remove the returned result from our data.
        WorkingSetID idToTest = top->id;
        _mergingData.erase(top);

        // Return the min.
        *out = idToTest;
        ++_commonStats.advanced;

        // But don't return it if it's flagged.
        if (_ws->isFlagged(*out)) {
            _ws->free(*out);
            return PlanStage::NEED_TIME;
        }

        return PlanStage::ADVANCED;
    }
Example #6
0
    PlanStage::StageState AndHashStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        if (isEOF()) { return PlanStage::IS_EOF; }

        // Fast-path for one of our children being EOF immediately.  We work each child a few times.
        // If it hits EOF, the AND cannot output anything.  If it produces a result, we stash that
        // result in _lookAheadResults.
        if (_lookAheadResults.empty()) {
            // INVALID_ID means that the child didn't produce a valid result.
            _lookAheadResults.resize(_children.size(), WorkingSet::INVALID_ID);

            // Work each child some number of times until it's either EOF or produces
            // a result.  If it's EOF this whole stage will be EOF.  If it produces a
            // result we cache it for later.
            for (size_t i = 0; i < _children.size(); ++i) {
                PlanStage* child = _children[i];
                for (size_t j = 0; j < kLookAheadWorks; ++j) {
                    StageState childStatus = child->work(&_lookAheadResults[i]);

                    if (PlanStage::IS_EOF == childStatus || PlanStage::DEAD == childStatus ||
                        PlanStage::FAILURE == childStatus) {

                        // A child went right to EOF.  Bail out.
                        _hashingChildren = false;
                        _dataMap.clear();
                        return PlanStage::IS_EOF;
                    }
                    else if (PlanStage::ADVANCED == childStatus) {
                        // We have a result cached in _lookAheadResults[i].  Stop looking at this
                        // child.
                        break;
                    }
                    // We ignore NEED_TIME.  TODO: What do we want to do if the child provides
                    // NEED_FETCH?
                }
            }

            // We did a bunch of work above, return NEED_TIME to be fair.
            return PlanStage::NEED_TIME;
        }

        // An AND is either reading the first child into the hash table, probing against the hash
        // table with subsequent children, or checking the last child's results to see if they're
        // in the hash table.

        // We read the first child into our hash table.
        if (_hashingChildren) {
            if (0 == _currentChild) {
                return readFirstChild(out);
            }
            else if (_currentChild < _children.size() - 1) {
                return hashOtherChildren(out);
            }
            else {
                _hashingChildren = false;
                // We don't hash our last child.  Instead, we probe the table created from the
                // previous children, returning results in the order of the last child.
                // Fall through to below.
            }
        }

        // Returning results.  We read from the last child and return the results that are in our
        // hash map.

        // We should be EOF if we're not hashing results and the dataMap is empty.
        verify(!_dataMap.empty());

        // We probe _dataMap with the last child.
        verify(_currentChild == _children.size() - 1);

        // Get the next result for the (_children.size() - 1)-th child.
        StageState childStatus = workChild(_children.size() - 1, out);
        if (PlanStage::ADVANCED != childStatus) {
            return childStatus;
        }

        // We know that we've ADVANCED.  See if the WSM is in our table.
        WorkingSetMember* member = _ws->get(*out);

        // Maybe the child had an invalidation.  We intersect DiskLoc(s) so we can't do anything
        // with this WSM.
        if (!member->hasLoc()) {
            _ws->flagForReview(*out);
            return PlanStage::NEED_TIME;
        }

        DataMap::iterator it = _dataMap.find(member->loc);
        if (_dataMap.end() == it) {
            // Child's output wasn't in every previous child.  Throw it out.
            _ws->free(*out);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else {
            // Child's output was in every previous child.  Merge any key data in
            // the child's output and free the child's just-outputted WSM.
            WorkingSetID hashID = it->second;
            _dataMap.erase(it);

            WorkingSetMember* olderMember = _ws->get(hashID);
            AndCommon::mergeFrom(olderMember, *member);
            _ws->free(*out);

            // We should check for matching at the end so the matcher can use information in the
            // indices of all our children.
            if (Filter::passes(olderMember, _filter)) {
                *out = hashID;
                ++_commonStats.advanced;
                return PlanStage::ADVANCED;
            }
            else {
                _ws->free(hashID);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
    }