void run() {
            Client::WriteContext ctx(ns());
            fillData();

            // The data we're going to later invalidate.
            set<DiskLoc> locs;
            getLocs(&locs);

            // Build the mock stage which feeds the data.
            WorkingSet ws;
            auto_ptr<MockStage> ms(new MockStage(&ws));
            insertVarietyOfObjects(ms.get());

            SortStageParams params;
            params.pattern = BSON("foo" << 1);
            auto_ptr<SortStage> ss(new SortStage(params, &ws, ms.get()));

            const int firstRead = 10;

            // Have sort read in data from the mock stage.
            for (int i = 0; i < firstRead; ++i) {
                WorkingSetID id;
                PlanStage::StageState status = ss->work(&id);
                ASSERT_NOT_EQUALS(PlanStage::ADVANCED, status);
            }

            // We should have read in the first 'firstRead' locs.  Invalidate the first.
            ss->prepareToYield();
            set<DiskLoc>::iterator it = locs.begin();
            ss->invalidate(*it++);
            ss->recoverFromYield();

            // Read the rest of the data from the mock stage.
            while (!ms->isEOF()) {
                WorkingSetID id;
                ss->work(&id);
            }

            // Release to prevent double-deletion.
            ms.release();

            // Let's just invalidate everything now.
            ss->prepareToYield();
            while (it != locs.end()) {
                ss->invalidate(*it++);
            }
            ss->recoverFromYield();

            // The sort should still work.
            int count = 0;
            while (!ss->isEOF()) {
                WorkingSetID id;
                PlanStage::StageState status = ss->work(&id);
                if (PlanStage::ADVANCED != status) { continue; }
                WorkingSetMember* member = ws.get(id);
                ASSERT(member->hasObj());
                ASSERT(!member->hasLoc());
                ++count;
            }

            // We've invalidated everything, but only 2/3 of our data had a DiskLoc to be
            // invalidated.  We get the rest as-is.
            ASSERT_EQUALS(count, numObj());
        }
Beispiel #2
0
    PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) {
        PlanStage::StageState state = _child->work(out);

        // All done reading from _child.
        if (PlanStage::IS_EOF == state) {
            _child.reset();

            // Adjust the annulus size depending on how many results we got.
            if (_results.empty()) {
                _radiusIncrement *= 2;
            } else if (_results.size() < 300) {
                _radiusIncrement *= 2;
            } else if (_results.size() > 600) {
                _radiusIncrement /= 2;
            }

            // Make a new ixscan next time.
            return PlanStage::NEED_TIME;
        }

        // Nothing to do unless we advance.
        if (PlanStage::ADVANCED != state) { return state; }

        // TODO Speed improvements:
        //
        // 0. Modify fetch to preserve key data and test for intersection w/annulus.
        //
        // 1. keep track of what we've seen in this scan and possibly ignore it.
        //
        // 2. keep track of results we've returned before and ignore them.

        WorkingSetMember* member = _ws->get(*out);
        // Must have an object in order to get geometry out of it.
        verify(member->hasObj());

        // Get all the fields with that name from the document.
        BSONElementSet geom;
        member->obj.getFieldsDotted(_params.nearQuery.field, geom, false);
        if (geom.empty()) {return PlanStage::NEED_TIME; }

        // Some value that any distance we can calculate will be less than.
        double minDistance = numeric_limits<double>::max();
        BSONObj minDistanceObj;
        for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) {
            if (!git->isABSONObj()) { return PlanStage::FAILURE; }
            BSONObj obj = git->Obj();

            double distToObj;
            if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) {
                if (distToObj < minDistance) {
                    minDistance = distToObj;
                    minDistanceObj = obj;
                }
            }
            else {
                warning() << "unknown geometry: " << obj.toString();
            }
        }

        // If the distance to the doc satisfies our distance criteria, add it to our buffered
        // results.
        if (minDistance >= _innerRadius &&
            (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) {
            _results.push(Result(*out, minDistance));
            if (_params.addDistMeta) {
                member->addComputed(new GeoDistanceComputedData(minDistance));
            }
            if (_params.addPointMeta) {
                member->addComputed(new GeoNearPointComputedData(minDistanceObj));
            }
            if (member->hasLoc()) {
                _invalidationMap[member->loc] = *out;
            }
        }

        return PlanStage::NEED_TIME;
    }
Beispiel #3
0
    void run() {
        // Populate the collection.
        for (int i = 0; i < 50; ++i) {
            insert(BSON("_id" << i << "foo" << i));
        }
        ASSERT_EQUALS(50U, count(BSONObj()));

        // Various variables we'll need.
        OldClientWriteContext ctx(&_txn, nss.ns());
        OpDebug* opDebug = &CurOp::get(_txn)->debug();
        Collection* coll = ctx.getCollection();
        UpdateLifecycleImpl updateLifecycle(false, nss);
        UpdateRequest request(nss);
        UpdateDriver driver((UpdateDriver::Options()));
        const int targetDocIndex = 10;
        const BSONObj query = BSON("foo" << BSON("$gte" << targetDocIndex));
        const unique_ptr<WorkingSet> ws(stdx::make_unique<WorkingSet>());
        const unique_ptr<CanonicalQuery> cq(canonicalize(query));

        // Get the RecordIds that would be returned by an in-order scan.
        vector<RecordId> locs;
        getLocs(coll, CollectionScanParams::FORWARD, &locs);

        // Populate the request.
        request.setQuery(query);
        request.setUpdates(fromjson("{$set: {x: 0}}"));
        request.setSort(BSONObj());
        request.setMulti(false);
        request.setReturnDocs(UpdateRequest::RETURN_NEW);
        request.setLifecycle(&updateLifecycle);

        ASSERT_OK(driver.parse(request.getUpdates(), request.isMulti()));

        // Configure a QueuedDataStage to pass the first object in the collection back in a
        // LOC_AND_OBJ state.
        std::unique_ptr<QueuedDataStage> qds(stdx::make_unique<QueuedDataStage>(ws.get()));
        WorkingSetID id = ws->allocate();
        WorkingSetMember* member = ws->get(id);
        member->loc = locs[targetDocIndex];
        const BSONObj oldDoc = BSON("_id" << targetDocIndex << "foo" << targetDocIndex);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), oldDoc);
        ws->transitionToLocAndObj(id);
        qds->pushBack(id);

        // Configure the update.
        UpdateStageParams updateParams(&request, &driver, opDebug);
        updateParams.canonicalQuery = cq.get();

        unique_ptr<UpdateStage> updateStage(
            stdx::make_unique<UpdateStage>(&_txn, updateParams, ws.get(), coll, qds.release()));

        // Should return advanced.
        id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = updateStage->work(&id);
        ASSERT_EQUALS(PlanStage::ADVANCED, state);

        // Make sure the returned value is what we expect it to be.

        // Should give us back a valid id.
        ASSERT_TRUE(WorkingSet::INVALID_ID != id);
        WorkingSetMember* resultMember = ws->get(id);
        // With an owned copy of the object, with no RecordId.
        ASSERT_TRUE(resultMember->hasOwnedObj());
        ASSERT_FALSE(resultMember->hasLoc());
        ASSERT_EQUALS(resultMember->getState(), WorkingSetMember::OWNED_OBJ);
        ASSERT_TRUE(resultMember->obj.value().isOwned());

        // Should be the new value.
        BSONObj newDoc = BSON("_id" << targetDocIndex << "foo" << targetDocIndex << "x" << 0);
        ASSERT_EQUALS(resultMember->obj.value(), newDoc);

        // Should have done the update.
        vector<BSONObj> objs;
        getCollContents(coll, &objs);
        ASSERT_EQUALS(objs[targetDocIndex], newDoc);

        // That should be it.
        id = WorkingSet::INVALID_ID;
        ASSERT_EQUALS(PlanStage::IS_EOF, updateStage->work(&id));
    }
Beispiel #4
0
    PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) {
        PlanStage::StageState state = _child->work(out);

        // All done reading from _child.
        if (PlanStage::IS_EOF == state) {
            _child.reset();
            _keyGeoFilter.reset();

            // Adjust the annulus size depending on how many results we got.
            if (_results.empty()) {
                _radiusIncrement *= 2;
            } else if (_results.size() < 300) {
                _radiusIncrement *= 2;
            } else if (_results.size() > 600) {
                _radiusIncrement /= 2;
            }

            // Make a new ixscan next time.
            return PlanStage::NEED_TIME;
        }

        // Nothing to do unless we advance.
        if (PlanStage::ADVANCED != state) { return state; }

        WorkingSetMember* member = _ws->get(*out);
        // Must have an object in order to get geometry out of it.
        verify(member->hasObj());

        // The scans we use don't dedup so we must dedup them ourselves.  We only put locs into here
        // if we know for sure whether or not we'll return them in this annulus.
        if (member->hasLoc()) {
            if (_seenInScan.end() != _seenInScan.find(member->loc)) {
                return PlanStage::NEED_TIME;
            }
        }

        // Get all the fields with that name from the document.
        BSONElementSet geom;
        member->obj.getFieldsDotted(_params.nearQuery.field, geom, false);
        if (geom.empty()) {
            return PlanStage::NEED_TIME;
        }

        // Some value that any distance we can calculate will be less than.
        double minDistance = numeric_limits<double>::max();
        BSONObj minDistanceObj;
        for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) {
            if (!git->isABSONObj()) {
                mongoutils::str::stream ss;
                ss << "s2near stage read invalid geometry element " << *git << " from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                return PlanStage::FAILURE;
            }
            BSONObj obj = git->Obj();

            double distToObj;
            if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) {
                if (distToObj < minDistance) {
                    minDistance = distToObj;
                    minDistanceObj = obj;
                }
            }
            else {
                warning() << "unknown geometry: " << obj.toString();
            }
        }

        // If we're here we'll either include the doc in this annulus or reject it.  It's safe to
        // ignore it if it pops up again in this annulus.
        if (member->hasLoc()) {
            _seenInScan.insert(member->loc);
        }

        // If the distance to the doc satisfies our distance criteria, add it to our buffered
        // results.
        if (minDistance >= _innerRadius &&
            (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) {
            _results.push(Result(*out, minDistance));
            if (_params.addDistMeta) {
                member->addComputed(new GeoDistanceComputedData(minDistance));
            }
            if (_params.addPointMeta) {
                member->addComputed(new GeoNearPointComputedData(minDistanceObj));
            }
            if (member->hasLoc()) {
                _invalidationMap[member->loc] = *out;
            }
        }

        return PlanStage::NEED_TIME;
    }
Beispiel #5
0
PlanStage::StageState AndSortedStage::moveTowardTargetLoc(WorkingSetID* out) {
    verify(numeric_limits<size_t>::max() != _targetNode);
    verify(WorkingSet::INVALID_ID != _targetId);

    // We have nodes that haven't hit _targetLoc yet.
    size_t workingChildNumber = _workingTowardRep.front();
    PlanStage* next = _children[workingChildNumber];
    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState state = next->work(&id);

    if (PlanStage::ADVANCED == state) {
        WorkingSetMember* member = _ws->get(id);

        // Maybe the child had an invalidation.  We intersect RecordId(s) so we can't do anything
        // with this WSM.
        if (!member->hasLoc()) {
            _ws->flagForReview(id);
            return PlanStage::NEED_TIME;
        }

        verify(member->hasLoc());

        if (member->loc == _targetLoc) {
            // The front element has hit _targetLoc.  Don't move it forward anymore/work on
            // another element.
            _workingTowardRep.pop();
            AndCommon::mergeFrom(_ws, _targetId, *member);
            _ws->free(id);

            if (0 == _workingTowardRep.size()) {
                WorkingSetID toReturn = _targetId;

                _targetNode = numeric_limits<size_t>::max();
                _targetId = WorkingSet::INVALID_ID;
                _targetLoc = RecordId();

                *out = toReturn;
                ++_commonStats.advanced;
                return PlanStage::ADVANCED;
            }
            // More children need to be advanced to _targetLoc.
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else if (member->loc < _targetLoc) {
            // The front element of _workingTowardRep hasn't hit the thing we're AND-ing with
            // yet.  Try again later.
            _ws->free(id);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else {
            // member->loc > _targetLoc.
            // _targetLoc wasn't successfully AND-ed with the other sub-plans.  We toss it and
            // try AND-ing with the next value.
            _specificStats.failedAnd[_targetNode]++;

            _ws->free(_targetId);
            _targetNode = workingChildNumber;
            _targetLoc = member->loc;
            _targetId = id;
            _workingTowardRep = std::queue<size_t>();
            for (size_t i = 0; i < _children.size(); ++i) {
                if (workingChildNumber != i) {
                    _workingTowardRep.push(i);
                }
            }
            // Need time to chase after the new _targetLoc.
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
    } else if (PlanStage::IS_EOF == state) {
        _isEOF = true;
        _ws->free(_targetId);
        return state;
    } else if (PlanStage::FAILURE == state || PlanStage::DEAD == state) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "sorted AND stage failed to read in results from child " << workingChildNumber;
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        }
        _isEOF = true;
        _ws->free(_targetId);
        return state;
    } else {
        if (PlanStage::NEED_TIME == state) {
            ++_commonStats.needTime;
        } else if (PlanStage::NEED_YIELD == state) {
            ++_commonStats.needYield;
            *out = id;
        }

        return state;
    }
}
Beispiel #6
0
    /**
     * addToBuffer() and sortBuffer() work differently based on the
     * configured limit. addToBuffer() is also responsible for
     * performing some accounting on the overall memory usage to
     * make sure we're not using too much memory.
     *
     * limit == 0:
     *     addToBuffer() - Adds item to vector.
     *     sortBuffer() - Sorts vector.
     * limit == 1:
     *     addToBuffer() - Replaces first item in vector with max of
     *                     current and new item.
     *                     Updates memory usage if item was replaced.
     *     sortBuffer() - Does nothing.
     * limit > 1:
     *     addToBuffer() - Does not update vector. Adds item to set.
     *                     If size of set exceeds limit, remove item from set
     *                     with lowest key. Updates memory usage accordingly.
     *     sortBuffer() - Copies items from set to vectors.
     */
    void SortStage::addToBuffer(const SortableDataItem& item) {
        // Holds ID of working set member to be freed at end of this function.
        WorkingSetID wsidToFree = WorkingSet::INVALID_ID;

        if (_limit == 0) {
            _data.push_back(item);
            _memUsage += _ws->get(item.wsid)->getMemUsage();
        }
        else if (_limit == 1) {
            if (_data.empty()) {
                _data.push_back(item);
                _memUsage = _ws->get(item.wsid)->getMemUsage();
                return;
            }
            wsidToFree = item.wsid;
            const WorkingSetComparator& cmp = *_sortKeyComparator;
            // Compare new item with existing item in vector.
            if (cmp(item, _data[0])) {
                wsidToFree = _data[0].wsid;
                _data[0] = item;
                _memUsage = _ws->get(item.wsid)->getMemUsage();
            }
        }
        else {
            // Update data item set instead of vector
            // Limit not reached - insert and return
            vector<SortableDataItem>::size_type limit(_limit);
            if (_dataSet->size() < limit) {
                _dataSet->insert(item);
                _memUsage += _ws->get(item.wsid)->getMemUsage();
                return;
            }
            // Limit will be exceeded - compare with item with lowest key
            // If new item does not have a lower key value than last item,
            // do nothing.
            wsidToFree = item.wsid;
            SortableDataItemSet::const_iterator lastItemIt = --(_dataSet->end());
            const SortableDataItem& lastItem = *lastItemIt;
            const WorkingSetComparator& cmp = *_sortKeyComparator;
            if (cmp(item, lastItem)) {
                _memUsage -= _ws->get(lastItem.wsid)->getMemUsage();
                _memUsage += _ws->get(item.wsid)->getMemUsage();
                wsidToFree = lastItem.wsid;
                // According to std::set iterator validity rules,
                // it does not matter which of erase()/insert() happens first.
                // Here, we choose to erase first to release potential resources
                // used by the last item and to keep the scope of the iterator to a minimum.
                _dataSet->erase(lastItemIt);
                _dataSet->insert(item);
            }
        }

        // If the working set ID is valid, remove from
        // DiskLoc invalidation map and free from working set.
        if (wsidToFree != WorkingSet::INVALID_ID) {
            WorkingSetMember* member = _ws->get(wsidToFree);
            if (member->hasLoc()) {
                _wsidByDiskLoc.erase(member->loc);
            }
            _ws->free(wsidToFree);
        }
    }
Beispiel #7
0
    PlanStage::StageState OrStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }

        if (0 == _specificStats.matchTested.size()) {
            _specificStats.matchTested = vector<size_t>(_children.size(), 0);
        }

        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState childStatus = _children[_currentChild]->work(&id);

        if (PlanStage::ADVANCED == childStatus) {
            WorkingSetMember* member = _ws->get(id);

            // If we're deduping (and there's something to dedup by)
            if (_dedup && member->hasLoc()) {
                ++_specificStats.dupsTested;

                // ...and we've seen the DiskLoc before
                if (_seen.end() != _seen.find(member->loc)) {
                    // ...drop it.
                    ++_specificStats.dupsDropped;
                    _ws->free(id);
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }
                else {
                    // Otherwise, note that we've seen it.
                    _seen.insert(member->loc);
                }
            }

            if (Filter::passes(member, _filter)) {
                if (NULL != _filter) {
                    ++_specificStats.matchTested[_currentChild];
                }
                // Match!  return it.
                *out = id;
                ++_commonStats.advanced;
                return PlanStage::ADVANCED;
            }
            else {
                // Does not match, try again.
                _ws->free(id);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
        else if (PlanStage::IS_EOF == childStatus) {
            // Done with _currentChild, move to the next one.
            ++_currentChild;

            // Maybe we're out of children.
            if (isEOF()) {
                return PlanStage::IS_EOF;
            }
            else {
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
        else if (PlanStage::FAILURE == childStatus) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "OR stage failed to read in results from child " << _currentChild;
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
            }
            return childStatus;
        }
        else if (PlanStage::NEED_TIME == childStatus) {
            ++_commonStats.needTime;
        }
        else if (PlanStage::NEED_FETCH == childStatus) {
            ++_commonStats.needFetch;
            *out = id;
        }

        // NEED_TIME, ERROR, NEED_FETCH, pass them up.
        return childStatus;
    }
Beispiel #8
0
    PlanStage::StageState AndHashStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        if (isEOF()) { return PlanStage::IS_EOF; }

        // An AND is either reading the first child into the hash table, probing against the hash
        // table with subsequent children, or checking the last child's results to see if they're
        // in the hash table.

        // We read the first child into our hash table.
        if (_hashingChildren) {
            if (0 == _currentChild) {
                return readFirstChild(out);
            }
            else if (_currentChild < _children.size() - 1) {
                return hashOtherChildren(out);
            }
            else {
                _hashingChildren = false;
                // We don't hash our last child.  Instead, we probe the table created from the
                // previous children, returning results in the order of the last child.
                // Fall through to below.
            }
        }

        // Returning results.  We read from the last child and return the results that are in our
        // hash map.

        // We should be EOF if we're not hashing results and the dataMap is empty.
        verify(!_dataMap.empty());

        // We probe _dataMap with the last child.
        verify(_currentChild == _children.size() - 1);

        // Work the last child.
        StageState childStatus = _children[_children.size() - 1]->work(out);
        if (PlanStage::ADVANCED != childStatus) {
            return childStatus;
        }

        // We know that we've ADVANCED.  See if the WSM is in our table.
        WorkingSetMember* member = _ws->get(*out);

        // Maybe the child had an invalidation.  We intersect DiskLoc(s) so we can't do anything
        // with this WSM.
        if (!member->hasLoc()) {
            _ws->flagForReview(*out);
            return PlanStage::NEED_TIME;
        }

        DataMap::iterator it = _dataMap.find(member->loc);
        if (_dataMap.end() == it) {
            // Child's output wasn't in every previous child.  Throw it out.
            _ws->free(*out);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else {
            // Child's output was in every previous child.  Merge any key data in
            // the child's output and free the child's just-outputted WSM.
            WorkingSetID hashID = it->second;
            _dataMap.erase(it);

            WorkingSetMember* olderMember = _ws->get(hashID);
            AndCommon::mergeFrom(olderMember, *member);
            _ws->free(*out);

            // We should check for matching at the end so the matcher can use information in the
            // indices of all our children.
            if (Filter::passes(olderMember, _filter)) {
                *out = hashID;
                ++_commonStats.advanced;
                return PlanStage::ADVANCED;
            }
            else {
                _ws->free(hashID);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
    }
Beispiel #9
0
PlanStage::StageState AndHashStage::readFirstChild(WorkingSetID* out) {
    verify(_currentChild == 0);

    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState childStatus = workChild(0, &id);

    if (PlanStage::ADVANCED == childStatus) {
        WorkingSetMember* member = _ws->get(id);

        // Maybe the child had an invalidation.  We intersect RecordId(s) so we can't do anything
        // with this WSM.
        if (!member->hasLoc()) {
            _ws->flagForReview(id);
            return PlanStage::NEED_TIME;
        }

        verify(member->hasLoc());
        verify(_dataMap.end() == _dataMap.find(member->loc));

        _dataMap[member->loc] = id;

        // Update memory stats.
        _memUsage += member->getMemUsage();

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    }
    else if (PlanStage::IS_EOF == childStatus) {
        // Done reading child 0.
        _currentChild = 1;

        // If our first child was empty, don't scan any others, no possible results.
        if (_dataMap.empty()) {
            _hashingChildren = false;
            return PlanStage::IS_EOF;
        }

        ++_commonStats.needTime;
        _specificStats.mapAfterChild.push_back(_dataMap.size());

        return PlanStage::NEED_TIME;
    }
    else if (PlanStage::FAILURE == childStatus) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "hashed AND stage failed to read in results to from first child";
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember( _ws, status);
        }
        return childStatus;
    }
    else {
        if (PlanStage::NEED_TIME == childStatus) {
            ++_commonStats.needTime;
        }
        else if (PlanStage::NEED_FETCH == childStatus) {
            ++_commonStats.needFetch;
            *out = id;
        }

        return childStatus;
    }
}
Beispiel #10
0
    PlanStage::StageState MergeSortStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }

        if (!_noResultToMerge.empty()) {
            // We have some child that we don't have a result from.  Each child must have a result
            // in order to pick the minimum result among all our children.  Work a child.
            PlanStage* child = _noResultToMerge.front();
            WorkingSetID id = WorkingSet::INVALID_ID;
            StageState code = child->work(&id);

            if (PlanStage::ADVANCED == code) {
                // If we're deduping...
                if (_dedup) {
                    WorkingSetMember* member = _ws->get(id);

                    if (!member->hasLoc()) {
                        // Can't dedup data unless there's a DiskLoc.  We go ahead and use its
                        // result.
                        _noResultToMerge.pop();
                    }
                    else {
                        ++_specificStats.dupsTested;
                        // ...and there's a diskloc and and we've seen the DiskLoc before
                        if (_seen.end() != _seen.find(member->loc)) {
                            // ...drop it.
                            _ws->free(id);
                            ++_commonStats.needTime;
                            ++_specificStats.dupsDropped;
                            return PlanStage::NEED_TIME;
                        }
                        else {
                            // Otherwise, note that we've seen it.
                            _seen.insert(member->loc);
                            // We're going to use the result from the child, so we remove it from
                            // the queue of children without a result.
                            _noResultToMerge.pop();
                        }
                    }
                }
                else {
                    // Not deduping.  We use any result we get from the child.  Remove the child
                    // from the queue of things without a result.
                    _noResultToMerge.pop();
                }

                // Store the result in our list.
                StageWithValue value;
                value.id = id;
                value.stage = child;
                _mergingData.push_front(value);

                // Insert the result (indirectly) into our priority queue.
                _merging.push(_mergingData.begin());

                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else if (PlanStage::IS_EOF == code) {
                // There are no more results possible from this child.  Don't bother with it
                // anymore.
                _noResultToMerge.pop();
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else if (PlanStage::FAILURE == code) {
                *out = id;
                // If a stage fails, it may create a status WSM to indicate why it
                // failed, in which case 'id' is valid.  If ID is invalid, we
                // create our own error message.
                if (WorkingSet::INVALID_ID == id) {
                    mongoutils::str::stream ss;
                    ss << "merge sort stage failed to read in results from child";
                    Status status(ErrorCodes::InternalError, ss);
                    *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                }
                return code;
            }
            else {
                if (PlanStage::NEED_TIME == code) {
                    ++_commonStats.needTime;
                }
                return code;
            }
        }

        // If we're here, for each non-EOF child, we have a valid WSID.
        verify(!_merging.empty());

        // Get the 'min' WSID.  _merging is a priority queue so its top is the smallest.
        MergingRef top = _merging.top();
        _merging.pop();

        // Since we're returning the WSID that came from top->stage, we need to work(...) it again
        // to get a new result.
        _noResultToMerge.push(top->stage);

        // Save the ID that we're returning and remove the returned result from our data.
        WorkingSetID idToTest = top->id;
        _mergingData.erase(top);

        // Return the min.
        *out = idToTest;
        ++_commonStats.advanced;

        // But don't return it if it's flagged.
        if (_ws->isFlagged(*out)) {
            _ws->free(*out);
            return PlanStage::NEED_TIME;
        }

        return PlanStage::ADVANCED;
    }
Beispiel #11
0
    PlanStage::StageState AndHashStage::hashOtherChildren(WorkingSetID* out) {
        verify(_currentChild > 0);

        WorkingSetID id;
        StageState childStatus = _children[_currentChild]->work(&id);

        if (PlanStage::ADVANCED == childStatus) {
            WorkingSetMember* member = _ws->get(id);

            // Maybe the child had an invalidation.  We intersect DiskLoc(s) so we can't do anything
            // with this WSM.
            if (!member->hasLoc()) {
                _ws->flagForReview(id);
                return PlanStage::NEED_TIME;
            }

            verify(member->hasLoc());
            if (_dataMap.end() == _dataMap.find(member->loc)) {
                // Ignore.  It's not in any previous child.
            }
            else {
                // We have a hit.  Copy data into the WSM we already have.
                _seenMap.insert(member->loc);
                WorkingSetMember* olderMember = _ws->get(_dataMap[member->loc]);
                AndCommon::mergeFrom(olderMember, *member);
            }
            _ws->free(id);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::IS_EOF == childStatus) {
            // Finished with a child.
            ++_currentChild;

            // Keep elements of _dataMap that are in _seenMap.
            DataMap::iterator it = _dataMap.begin();
            while (it != _dataMap.end()) {
                if (_seenMap.end() == _seenMap.find(it->first)) {
                    DataMap::iterator toErase = it;
                    ++it;
                    _ws->free(toErase->second);
                    _dataMap.erase(toErase);
                }
                else { ++it; }
            }

            _specificStats.mapAfterChild.push_back(_dataMap.size());

            _seenMap.clear();

            // _dataMap is now the intersection of the first _currentChild nodes.

            // If we have nothing to AND with after finishing any child, stop.
            if (_dataMap.empty()) {
                _hashingChildren = false;
                return PlanStage::IS_EOF;
            }

            // We've finished scanning all children.  Return results with the next call to work().
            if (_currentChild == _children.size()) {
                _hashingChildren = false;
            }

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else {
            if (PlanStage::NEED_FETCH == childStatus) {
                *out = id;
                ++_commonStats.needFetch;
            }
            else if (PlanStage::NEED_TIME == childStatus) {
                ++_commonStats.needTime;
            }

            return childStatus;
        }
    }
Beispiel #12
0
    Runner::RunnerState PlanExecutor::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return Runner::RUNNER_DEAD; }

        for (;;) {
            // Yield, if we can yield ourselves.
            if (NULL != _yieldPolicy.get() && _yieldPolicy->shouldYield()) {
                saveState();
                _yieldPolicy->yield();
                if (_killed) { return Runner::RUNNER_DEAD; }
                restoreState();
            }

            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState code = _root->work(&id);

            if (PlanStage::ADVANCED == code) {
                // Fast count.
                if (WorkingSet::INVALID_ID == id) {
                    invariant(NULL == objOut);
                    invariant(NULL == dlOut);
                    return Runner::RUNNER_ADVANCED;
                }

                WorkingSetMember* member = _workingSet->get(id);
                bool hasRequestedData = true;

                if (NULL != objOut) {
                    if (WorkingSetMember::LOC_AND_IDX == member->state) {
                        if (1 != member->keyData.size()) {
                            _workingSet->free(id);
                            hasRequestedData = false;
                        }
                        else {
                            *objOut = member->keyData[0].keyData;
                        }
                    }
                    else if (member->hasObj()) {
                        *objOut = member->obj;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (NULL != dlOut) {
                    if (member->hasLoc()) {
                        *dlOut = member->loc;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (hasRequestedData) {
                    _workingSet->free(id);
                    return Runner::RUNNER_ADVANCED;
                }
                // This result didn't have the data the caller wanted, try again.
            }
            else if (PlanStage::NEED_TIME == code) {
                // Fall through to yield check at end of large conditional.
            }
            else if (PlanStage::NEED_FETCH == code) {
                // id has a loc and refers to an obj we need to fetch.
                WorkingSetMember* member = _workingSet->get(id);

                // This must be true for somebody to request a fetch and can only change when an
                // invalidation happens, which is when we give up a lock.  Don't give up the
                // lock between receiving the NEED_FETCH and actually fetching(?).
                verify(member->hasLoc());

                // XXX: remove NEED_FETCH
            }
            else if (PlanStage::IS_EOF == code) {
                return Runner::RUNNER_EOF;
            }
            else if (PlanStage::DEAD == code) {
                return Runner::RUNNER_DEAD;
            }
            else {
                verify(PlanStage::FAILURE == code);
                if (NULL != objOut) {
                    WorkingSetCommon::getStatusMemberObject(*_workingSet, id, objOut);
                }
                return Runner::RUNNER_ERROR;
            }
        }
    }
Beispiel #13
0
    // Set "toReturn" when NEED_FETCH.
    PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* error) {

        //
        // Try to retrieve the next covered member
        //

        if (!_nextInterval) {

            StatusWith<CoveredInterval*> intervalStatus = nextInterval(_txn,
                                                                       _workingSet,
                                                                       _collection);
            if (!intervalStatus.isOK()) {
                _searchState = SearchState_Finished;
                *error = intervalStatus.getStatus();
                return PlanStage::FAILURE;
            }

            if (NULL == intervalStatus.getValue()) {
                _searchState = SearchState_Finished;
                return PlanStage::IS_EOF;
            }

            // CoveredInterval and its child stage are owned by _childrenIntervals
            _childrenIntervals.push_back(intervalStatus.getValue());
            _nextInterval = _childrenIntervals.back();
            _nextIntervalStats.reset(new IntervalStats());
            _nextIntervalStats->minDistanceAllowed = _nextInterval->minDistance;
            _nextIntervalStats->maxDistanceAllowed = _nextInterval->maxDistance;
            _nextIntervalStats->inclusiveMaxDistanceAllowed = _nextInterval->inclusiveMax;
        }

        WorkingSetID nextMemberID;
        PlanStage::StageState intervalState = _nextInterval->covering->work(&nextMemberID);

        if (PlanStage::IS_EOF == intervalState) {
            _nextInterval = NULL;
            _nextIntervalSeen.clear();

            _searchState = SearchState_Advancing;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::FAILURE == intervalState) {
            *error = WorkingSetCommon::getMemberStatus(*_workingSet->get(nextMemberID));
            return intervalState;
        }
        else if (PlanStage::NEED_FETCH == intervalState) {
            *toReturn = nextMemberID;
            return intervalState;
        }
        else if (PlanStage::ADVANCED != intervalState) {
            return intervalState;
        }

        //
        // Try to buffer the next covered member
        //

        WorkingSetMember* nextMember = _workingSet->get(nextMemberID);

        // The child stage may not dedup so we must dedup them ourselves.
        if (_nextInterval->dedupCovering && nextMember->hasLoc()) {
            if (_nextIntervalSeen.end() != _nextIntervalSeen.find(nextMember->loc))
                return PlanStage::NEED_TIME;
        }

        ++_nextIntervalStats->numResultsFound;

        StatusWith<double> distanceStatus = computeDistance(nextMember);

        // Store the member's RecordId, if available, for quick invalidation
        if (nextMember->hasLoc()) {
            _nextIntervalSeen.insert(make_pair(nextMember->loc, nextMemberID));
        }

        if (!distanceStatus.isOK()) {
            _searchState = SearchState_Finished;
            *error = distanceStatus.getStatus();
            return PlanStage::FAILURE;
        }

        // If the member's distance is in the current distance interval, add it to our buffered
        // results.
        double memberDistance = distanceStatus.getValue();
        bool inInterval = memberDistance >= _nextInterval->minDistance
                          && (_nextInterval->inclusiveMax ?
                              memberDistance <= _nextInterval->maxDistance :
                              memberDistance < _nextInterval->maxDistance);

        // Update found distance stats
        if (_nextIntervalStats->minDistanceFound < 0
            || memberDistance < _nextIntervalStats->minDistanceFound) {
            _nextIntervalStats->minDistanceFound = memberDistance;
        }

        if (_nextIntervalStats->maxDistanceFound < 0
            || memberDistance > _nextIntervalStats->maxDistanceFound) {
            _nextIntervalStats->maxDistanceFound = memberDistance;
        }

        if (inInterval) {
            _resultBuffer.push(SearchResult(nextMemberID, memberDistance));

            ++_nextIntervalStats->numResultsBuffered;

            // Update buffered distance stats
            if (_nextIntervalStats->minDistanceBuffered < 0
                || memberDistance < _nextIntervalStats->minDistanceBuffered) {
                _nextIntervalStats->minDistanceBuffered = memberDistance;
            }

            if (_nextIntervalStats->maxDistanceBuffered < 0
                || memberDistance > _nextIntervalStats->maxDistanceBuffered) {
                _nextIntervalStats->maxDistanceBuffered = memberDistance;
            }
        }
        else {
            // We won't pass this WSM up, so deallocate it
            _workingSet->free(nextMemberID);
        }

        return PlanStage::NEED_TIME;
    }
Beispiel #14
0
    PlanStage::StageState DeleteStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }
        invariant(_collection); // If isEOF() returns false, we must have a collection.

        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState status = _child->work(&id);

        if (PlanStage::ADVANCED == status) {
            WorkingSetMember* member = _ws->get(id);
            if (!member->hasLoc()) {
                _ws->free(id);
                const std::string errmsg = "delete stage failed to read member w/ loc from child";
                *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError,
                                                                          errmsg));
                return PlanStage::FAILURE;
            }
            DiskLoc rloc = member->loc;
            _ws->free(id);

            BSONObj deletedDoc;

            WriteUnitOfWork wunit(_txn);

            // TODO: Do we want to buffer docs and delete them in a group rather than
            // saving/restoring state repeatedly?
            saveState();
            const bool deleteCappedOK = false;
            const bool deleteNoWarn = false;
            _collection->deleteDocument(_txn, rloc, deleteCappedOK, deleteNoWarn,
                                        _params.shouldCallLogOp ? &deletedDoc : NULL);
            restoreState(_txn);

            ++_specificStats.docsDeleted;

            if (_params.shouldCallLogOp) {
                if (deletedDoc.isEmpty()) {
                    log() << "Deleted object without id in collection " << _collection->ns()
                          << ", not logging.";
                }
                else {
                    bool replJustOne = true;
                    repl::logOp(_txn, "d", _collection->ns().ns().c_str(), deletedDoc, 0,
                                &replJustOne, _params.fromMigrate);
                }
            }

            wunit.commit();

            _txn->recoveryUnit()->commitIfNeeded();

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::FAILURE == status) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it failed, in which case
            // 'id' is valid.  If ID is invalid, we create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                const std::string errmsg = "delete stage failed to read in results from child";
                *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError,
                                                                          errmsg));
                return PlanStage::FAILURE;
            }
            return status;
        }
        else {
            if (PlanStage::NEED_TIME == status) {
                ++_commonStats.needTime;
            }
            return status;
        }
    }
Beispiel #15
0
    PlanExecutor::ExecState PlanExecutor::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return PlanExecutor::DEAD; }

        for (;;) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState code = _root->work(&id);

            if (PlanStage::ADVANCED == code) {
                // Fast count.
                if (WorkingSet::INVALID_ID == id) {
                    invariant(NULL == objOut);
                    invariant(NULL == dlOut);
                    return PlanExecutor::ADVANCED;
                }

                WorkingSetMember* member = _workingSet->get(id);
                bool hasRequestedData = true;

                if (NULL != objOut) {
                    if (WorkingSetMember::LOC_AND_IDX == member->state) {
                        if (1 != member->keyData.size()) {
                            _workingSet->free(id);
                            hasRequestedData = false;
                        }
                        else {
                            *objOut = member->keyData[0].keyData;
                        }
                    }
                    else if (member->hasObj()) {
                        *objOut = member->obj;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (NULL != dlOut) {
                    if (member->hasLoc()) {
                        *dlOut = member->loc;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (hasRequestedData) {
                    _workingSet->free(id);
                    return PlanExecutor::ADVANCED;
                }
                // This result didn't have the data the caller wanted, try again.
            }
            else if (PlanStage::NEED_TIME == code) {
                // Fall through to yield check at end of large conditional.
            }
            else if (PlanStage::IS_EOF == code) {
                return PlanExecutor::IS_EOF;
            }
            else if (PlanStage::DEAD == code) {
                return PlanExecutor::DEAD;
            }
            else {
                verify(PlanStage::FAILURE == code);
                if (NULL != objOut) {
                    WorkingSetCommon::getStatusMemberObject(*_workingSet, id, objOut);
                }
                return PlanExecutor::EXEC_ERROR;
            }
        }
    }
Beispiel #16
0
    PlanStage::StageState AndHashStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }

        // Fast-path for one of our children being EOF immediately.  We work each child a few times.
        // If it hits EOF, the AND cannot output anything.  If it produces a result, we stash that
        // result in _lookAheadResults.
        if (_lookAheadResults.empty()) {
            // INVALID_ID means that the child didn't produce a valid result.

            // We specifically are not using .resize(size, value) here because C++11 builds don't
            // seem to resolve WorkingSet::INVALID_ID during linking.
            _lookAheadResults.resize(_children.size());
            for (size_t i = 0; i < _children.size(); ++i) {
                _lookAheadResults[i] =  WorkingSet::INVALID_ID;
            }

            // Work each child some number of times until it's either EOF or produces
            // a result.  If it's EOF this whole stage will be EOF.  If it produces a
            // result we cache it for later.
            for (size_t i = 0; i < _children.size(); ++i) {
                PlanStage* child = _children[i];
                for (size_t j = 0; j < kLookAheadWorks; ++j) {
                    StageState childStatus = child->work(&_lookAheadResults[i]);

                    if (PlanStage::IS_EOF == childStatus || PlanStage::DEAD == childStatus) {

                        // A child went right to EOF.  Bail out.
                        _hashingChildren = false;
                        _dataMap.clear();
                        return PlanStage::IS_EOF;
                    }
                    else if (PlanStage::ADVANCED == childStatus) {
                        // We have a result cached in _lookAheadResults[i].  Stop looking at this
                        // child.
                        break;
                    }
                    else if (PlanStage::FAILURE == childStatus) {
                        // Propage error to parent.
                        *out = _lookAheadResults[i];
                        // If a stage fails, it may create a status WSM to indicate why it
                        // failed, in which case 'id' is valid.  If ID is invalid, we
                        // create our own error message.
                        if (WorkingSet::INVALID_ID == *out) {
                            mongoutils::str::stream ss;
                            ss << "hashed AND stage failed to read in look ahead results "
                               << "from child " << i;
                            Status status(ErrorCodes::InternalError, ss);
                            *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                        }

                        _hashingChildren = false;
                        _dataMap.clear();
                        return PlanStage::FAILURE;
                    }
                    // We ignore NEED_TIME. TODO: what do we want to do if we get NEED_YIELD here?
                }
            }

            // We did a bunch of work above, return NEED_TIME to be fair.
            return PlanStage::NEED_TIME;
        }

        // An AND is either reading the first child into the hash table, probing against the hash
        // table with subsequent children, or checking the last child's results to see if they're
        // in the hash table.

        // We read the first child into our hash table.
        if (_hashingChildren) {
            // Check memory usage of previously hashed results.
            if (_memUsage > _maxMemUsage) {
                mongoutils::str::stream ss;
                ss << "hashed AND stage buffered data usage of " << _memUsage
                   << " bytes exceeds internal limit of " << kDefaultMaxMemUsageBytes << " bytes";
                Status status(ErrorCodes::Overflow, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                return PlanStage::FAILURE;
            }

            if (0 == _currentChild) {
                return readFirstChild(out);
            }
            else if (_currentChild < _children.size() - 1) {
                return hashOtherChildren(out);
            }
            else {
                _hashingChildren = false;
                // We don't hash our last child.  Instead, we probe the table created from the
                // previous children, returning results in the order of the last child.
                // Fall through to below.
            }
        }

        // Returning results.  We read from the last child and return the results that are in our
        // hash map.

        // We should be EOF if we're not hashing results and the dataMap is empty.
        verify(!_dataMap.empty());

        // We probe _dataMap with the last child.
        verify(_currentChild == _children.size() - 1);

        // Get the next result for the (_children.size() - 1)-th child.
        StageState childStatus = workChild(_children.size() - 1, out);
        if (PlanStage::ADVANCED != childStatus) {
            return childStatus;
        }

        // We know that we've ADVANCED.  See if the WSM is in our table.
        WorkingSetMember* member = _ws->get(*out);

        // Maybe the child had an invalidation.  We intersect RecordId(s) so we can't do anything
        // with this WSM.
        if (!member->hasLoc()) {
            _ws->flagForReview(*out);
            return PlanStage::NEED_TIME;
        }

        DataMap::iterator it = _dataMap.find(member->loc);
        if (_dataMap.end() == it) {
            // Child's output wasn't in every previous child.  Throw it out.
            _ws->free(*out);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else {
            // Child's output was in every previous child.  Merge any key data in
            // the child's output and free the child's just-outputted WSM.
            WorkingSetID hashID = it->second;
            _dataMap.erase(it);

            WorkingSetMember* olderMember = _ws->get(hashID);
            AndCommon::mergeFrom(olderMember, *member);
            _ws->free(*out);

            // We should check for matching at the end so the matcher can use information in the
            // indices of all our children.
            if (Filter::passes(olderMember, _filter)) {
                *out = hashID;
                ++_commonStats.advanced;
                return PlanStage::ADVANCED;
            }
            else {
                _ws->free(hashID);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
    }
Beispiel #17
0
PlanStage::StageState FetchStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // If there's an obj there, there is no fetching to perform.
        if (member->hasObj()) {
            ++_specificStats.alreadyHasObj;
        } else {
            // We need a valid loc to fetch from and this is the only state that has one.
            verify(WorkingSetMember::LOC_AND_IDX == member->getState());
            verify(member->hasLoc());

            try {
                if (!_cursor)
                    _cursor = _collection->getCursor(getOpCtx());

                if (auto fetcher = _cursor->fetcherForId(member->loc)) {
                    // There's something to fetch. Hand the fetcher off to the WSM, and pass up
                    // a fetch request.
                    _idRetrying = id;
                    member->setFetcher(fetcher.release());
                    *out = id;
                    return NEED_YIELD;
                }

                // The doc is already in memory, so go ahead and grab it. Now we have a RecordId
                // as well as an unowned object
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) {
                    _ws->free(id);
                    return NEED_TIME;
                }
            } catch (const WriteConflictException& wce) {
                // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may
                // be freed when we yield.
                member->makeObjOwnedIfNeeded();
                _idRetrying = id;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            }
        }

        return returnIfMatches(member, id, out);
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "fetch stage failed to read in results from child";
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        }
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}
Beispiel #18
0
    PlanStage::StageState AndHashStage::readFirstChild(WorkingSetID* out) {
        verify(_currentChild == 0);

        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState childStatus = workChild(0, &id);

        if (PlanStage::ADVANCED == childStatus) {
            WorkingSetMember* member = _ws->get(id);

            // Maybe the child had an invalidation.  We intersect RecordId(s) so we can't do anything
            // with this WSM.
            if (!member->hasLoc()) {
                _ws->flagForReview(id);
                return PlanStage::NEED_TIME;
            }

            if (!_dataMap.insert(std::make_pair(member->loc, id)).second) {
                // Didn't insert because we already had this loc inside the map. This should only
                // happen if we're seeing a newer copy of the same doc in a more recent snapshot.
                // Throw out the newer copy of the doc.
                _ws->free(id);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }

            // Update memory stats.
            _memUsage += member->getMemUsage();

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::IS_EOF == childStatus) {
            // Done reading child 0.
            _currentChild = 1;

            // If our first child was empty, don't scan any others, no possible results.
            if (_dataMap.empty()) {
                _hashingChildren = false;
                return PlanStage::IS_EOF;
            }

            ++_commonStats.needTime;
            _specificStats.mapAfterChild.push_back(_dataMap.size());

            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::FAILURE == childStatus) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "hashed AND stage failed to read in results to from first child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
            }
            return childStatus;
        }
        else {
            if (PlanStage::NEED_TIME == childStatus) {
                ++_commonStats.needTime;
            }
            else if (PlanStage::NEED_YIELD == childStatus) {
                ++_commonStats.needYield;
                *out = id;
            }

            return childStatus;
        }
    }
Beispiel #19
0
PlanStage::StageState DeleteStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // We want to free this member when we return, unless we need to retry it.
        ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

        if (!member->hasLoc()) {
            // We expect to be here because of an invalidation causing a force-fetch, and
            // doc-locking storage engines do not issue invalidations.
            ++_specificStats.nInvalidateSkips;
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        RecordId rloc = member->loc;
        // Deletes can't have projections. This means that covering analysis will always add
        // a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        try {
            // If the snapshot changed, then we have to make sure we have the latest copy of the
            // doc and that it still matches.
            std::unique_ptr<SeekableRecordCursor> cursor;
            if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) {
                cursor = _collection->getCursor(getOpCtx());
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) {
                    // Doc is already deleted. Nothing more to do.
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }

                // Make sure the re-fetched doc still matches the predicate.
                if (_params.canonicalQuery &&
                    !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) {
                    // Doesn't match.
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }
            }

            // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
            // is allowed to free the memory.
            if (_params.returnDeleted) {
                member->makeObjOwnedIfNeeded();
            }

            // TODO: Do we want to buffer docs and delete them in a group rather than
            // saving/restoring state repeatedly?

            try {
                if (supportsDocLocking()) {
                    // Doc-locking engines require this before saveState() since they don't use
                    // invalidations.
                    WorkingSetCommon::prepareForSnapshotChange(_ws);
                }
                child()->saveState();
            } catch (const WriteConflictException& wce) {
                std::terminate();
            }

            if (_params.returnDeleted) {
                // Save a copy of the document that is about to get deleted, but keep it in the
                // LOC_AND_OBJ state in case we need to retry deleting it.
                BSONObj deletedDoc = member->obj.value();
                member->obj.setValue(deletedDoc.getOwned());
            }

            // Do the write, unless this is an explain.
            if (!_params.isExplain) {
                WriteUnitOfWork wunit(getOpCtx());
                _collection->deleteDocument(getOpCtx(), rloc);
                wunit.commit();
            }

            ++_specificStats.docsDeleted;
        } catch (const WriteConflictException& wce) {
            // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may be
            // freed when we yield.
            member->makeObjOwnedIfNeeded();
            _idRetrying = id;
            memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
            *out = WorkingSet::INVALID_ID;
            _commonStats.needYield++;
            return NEED_YIELD;
        }

        if (_params.returnDeleted) {
            // After deleting the document, the RecordId associated with this member is invalid.
            // Remove the 'loc' from the WorkingSetMember before returning it.
            member->loc = RecordId();
            member->transitionToOwnedObj();
        }

        //  As restoreState may restore (recreate) cursors, cursors are tied to the
        //  transaction in which they are created, and a WriteUnitOfWork is a
        //  transaction, make sure to restore the state outside of the WritUnitOfWork.
        try {
            child()->restoreState();
        } catch (const WriteConflictException& wce) {
            // Note we don't need to retry anything in this case since the delete already
            // was committed. However, we still need to return the deleted document
            // (if it was requested).
            if (_params.returnDeleted) {
                // member->obj should refer to the deleted document.
                invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

                _idReturning = id;
                // Keep this member around so that we can return it on the next work() call.
                memberFreer.Dismiss();
            }
            *out = WorkingSet::INVALID_ID;
            _commonStats.needYield++;
            return NEED_YIELD;
        }

        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            memberFreer.Dismiss();  // Keep this member around so we can return it.
            *out = id;
            ++_commonStats.advanced;
            return PlanStage::ADVANCED;
        }

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which case
        // 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "delete stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
        }
        return status;
    } else if (PlanStage::NEED_TIME == status) {
        ++_commonStats.needTime;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
        ++_commonStats.needYield;
    }

    return status;
}
Beispiel #20
0
    PlanStage::StageState AndHashStage::hashOtherChildren(WorkingSetID* out) {
        verify(_currentChild > 0);

        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState childStatus = workChild(_currentChild, &id);

        if (PlanStage::ADVANCED == childStatus) {
            WorkingSetMember* member = _ws->get(id);

            // Maybe the child had an invalidation.  We intersect RecordId(s) so we can't do anything
            // with this WSM.
            if (!member->hasLoc()) {
                _ws->flagForReview(id);
                return PlanStage::NEED_TIME;
            }

            verify(member->hasLoc());
            if (_dataMap.end() == _dataMap.find(member->loc)) {
                // Ignore.  It's not in any previous child.
            }
            else {
                // We have a hit.  Copy data into the WSM we already have.
                _seenMap.insert(member->loc);
                WorkingSetMember* olderMember = _ws->get(_dataMap[member->loc]);
                size_t memUsageBefore = olderMember->getMemUsage();

                AndCommon::mergeFrom(olderMember, *member);

                // Update memory stats.
                _memUsage += olderMember->getMemUsage() - memUsageBefore;
            }
            _ws->free(id);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::IS_EOF == childStatus) {
            // Finished with a child.
            ++_currentChild;

            // Keep elements of _dataMap that are in _seenMap.
            DataMap::iterator it = _dataMap.begin();
            while (it != _dataMap.end()) {
                if (_seenMap.end() == _seenMap.find(it->first)) {
                    DataMap::iterator toErase = it;
                    ++it;

                    // Update memory stats.
                    WorkingSetMember* member = _ws->get(toErase->second);
                    _memUsage -= member->getMemUsage();

                    _ws->free(toErase->second);
                    _dataMap.erase(toErase);
                }
                else { ++it; }
            }

            _specificStats.mapAfterChild.push_back(_dataMap.size());

            _seenMap.clear();

            // _dataMap is now the intersection of the first _currentChild nodes.

            // If we have nothing to AND with after finishing any child, stop.
            if (_dataMap.empty()) {
                _hashingChildren = false;
                return PlanStage::IS_EOF;
            }

            // We've finished scanning all children.  Return results with the next call to work().
            if (_currentChild == _children.size()) {
                _hashingChildren = false;
            }

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::FAILURE == childStatus) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "hashed AND stage failed to read in results from other child "
                   << _currentChild;
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
            }
            return childStatus;
        }
        else {
            if (PlanStage::NEED_TIME == childStatus) {
                ++_commonStats.needTime;
            }
            else if (PlanStage::NEED_YIELD == childStatus) {
                ++_commonStats.needYield;
                *out = id;
            }

            return childStatus;
        }
    }
Beispiel #21
0
    PlanStage::StageState OrStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        if (isEOF()) { return PlanStage::IS_EOF; }

        if (0 == _specificStats.matchTested.size()) {
            _specificStats.matchTested = vector<uint64_t>(_children.size(), 0);
        }

        WorkingSetID id;
        StageState childStatus = _children[_currentChild]->work(&id);

        if (PlanStage::ADVANCED == childStatus) {
            WorkingSetMember* member = _ws->get(id);
            verify(member->hasLoc());

            // If we're deduping...
            if (_dedup) {
                ++_specificStats.dupsTested;

                // ...and we've seen the DiskLoc before
                if (_seen.end() != _seen.find(member->loc)) {
                    // ...drop it.
                    ++_specificStats.dupsDropped;
                    _ws->free(id);
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }
                else {
                    // Otherwise, note that we've seen it.
                    _seen.insert(member->loc);
                }
            }

            if (NULL == _matcher || _matcher->matches(member)) {
                if (NULL != _matcher) {
                    ++_specificStats.matchTested[_currentChild];
                }
                // Match!  return it.
                *out = id;
                ++_commonStats.advanced;
                return PlanStage::ADVANCED;
            }
            else {
                // Does not match, try again.
                _ws->free(id);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
        else if (PlanStage::IS_EOF == childStatus) {
            // Done with _currentChild, move to the next one.
            ++_currentChild;

            // Maybe we're out of children.
            if (isEOF()) {
                return PlanStage::IS_EOF;
            }
            else {
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
        }
        else {
            if (PlanStage::NEED_FETCH == childStatus) {
                ++_commonStats.needFetch;
            }
            else if (PlanStage::NEED_TIME == childStatus) {
                ++_commonStats.needTime;
            }

            // NEED_TIME, ERROR, NEED_YIELD, pass them up.
            return childStatus;
        }
    }
Beispiel #22
0
    PlanStage::StageState DeleteStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }
        invariant(_collection); // If isEOF() returns false, we must have a collection.

        // Either retry the last WSM we worked on or get a new one from our child.
        WorkingSetID id;
        StageState status;
        if (_idRetrying == WorkingSet::INVALID_ID) {
            status = _child->work(&id);
        }
        else {
            status = ADVANCED;
            id = _idRetrying;
            _idRetrying = WorkingSet::INVALID_ID;
        }

        if (PlanStage::ADVANCED == status) {
            WorkingSetMember* member = _ws->get(id);

            // We want to free this member when we return, unless we need to retry it.
            ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

            if (!member->hasLoc()) {
                // We expect to be here because of an invalidation causing a force-fetch, and
                // doc-locking storage engines do not issue invalidations.
                dassert(!supportsDocLocking());
                ++_specificStats.nInvalidateSkips;
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            RecordId rloc = member->loc;

            try {
                // If the snapshot changed, then we have to make sure we have the latest copy of the
                // doc and that it still matches.
                if (_txn->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) {
                    if (!WorkingSetCommon::fetch(_txn, member, _collection)) {
                        // Doc is already deleted. Nothing more to do.
                        ++_commonStats.needTime;
                        return PlanStage::NEED_TIME;
                    }

                    // Make sure the re-fetched doc still matches the predicate.
                    if (_params.canonicalQuery &&
                        !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) {
                        // Doesn't match.
                        ++_commonStats.needTime;
                        return PlanStage::NEED_TIME;
                    }
                }

                // TODO: Do we want to buffer docs and delete them in a group rather than
                // saving/restoring state repeatedly?

                try {
                    _child->saveState();
                    if (supportsDocLocking()) {
                        // Doc-locking engines require this after saveState() since they don't use
                        // invalidations.
                        WorkingSetCommon::prepareForSnapshotChange(_ws);
                    }
                }
                catch ( const WriteConflictException& wce ) {
                    std::terminate();
                }

                // Do the write, unless this is an explain.
                if (!_params.isExplain) {
                    WriteUnitOfWork wunit(_txn);

                    const bool deleteCappedOK = false;
                    const bool deleteNoWarn = false;
                    BSONObj deletedDoc;

                    _collection->deleteDocument(_txn, rloc, deleteCappedOK, deleteNoWarn,
                                                _params.shouldCallLogOp ? &deletedDoc : NULL);

                    if (_params.shouldCallLogOp) {
                        if (deletedDoc.isEmpty()) {
                            log() << "Deleted object without id in collection " << _collection->ns()
                            << ", not logging.";
                        }
                        else {
                            getGlobalServiceContext()->getOpObserver()->onDelete(
                                    _txn,
                                    _collection->ns().ns(),
                                    deletedDoc,
                                    _params.fromMigrate);
                        }
                    }

                    wunit.commit();
                }

                ++_specificStats.docsDeleted;
            }
            catch ( const WriteConflictException& wce ) {
                _idRetrying = id;
                memberFreer.Dismiss(); // Keep this member around so we can retry deleting it.
                *out = WorkingSet::INVALID_ID;
                _commonStats.needYield++;
                return NEED_YIELD;
            }

            //  As restoreState may restore (recreate) cursors, cursors are tied to the
            //  transaction in which they are created, and a WriteUnitOfWork is a
            //  transaction, make sure to restore the state outside of the WritUnitOfWork.
            try {
                _child->restoreState(_txn);
            }
            catch ( const WriteConflictException& wce ) {
                // Note we don't need to retry anything in this case since the delete already
                // was committed.
                *out = WorkingSet::INVALID_ID;
                _commonStats.needYield++;
                return NEED_YIELD;
            }

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::FAILURE == status) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it failed, in which case
            // 'id' is valid.  If ID is invalid, we create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                const std::string errmsg = "delete stage failed to read in results from child";
                *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError,
                                                                          errmsg));
                return PlanStage::FAILURE;
            }
            return status;
        }
        else if (PlanStage::NEED_TIME == status) {
            ++_commonStats.needTime;
        }
        else if (PlanStage::NEED_YIELD == status) {
            *out = id;
            ++_commonStats.needYield;
        }

        return status;
    }
Beispiel #23
0
PlanStage::StageState SortStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (NULL == _sortKeyGen) {
        // This is heavy and should be done as part of work().
        _sortKeyGen.reset(new SortStageKeyGenerator(_collection, _pattern, _query));
        _sortKeyComparator.reset(new WorkingSetComparator(_sortKeyGen->getSortComparator()));
        // If limit > 1, we need to initialize _dataSet here to maintain ordered
        // set of data items while fetching from the child stage.
        if (_limit > 1) {
            const WorkingSetComparator& cmp = *_sortKeyComparator;
            _dataSet.reset(new SortableDataItemSet(cmp));
        }
        return PlanStage::NEED_TIME;
    }

    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes);
    if (_memUsage > maxBytes) {
        mongoutils::str::stream ss;
        ss << "Sort operation used more than the maximum " << maxBytes
           << " bytes of RAM. Add an index, or specify a smaller limit.";
        Status status(ErrorCodes::OperationFailed, ss);
        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        return PlanStage::FAILURE;
    }

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Still reading in results to sort.
    if (!_sorted) {
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = _child->work(&id);

        if (PlanStage::ADVANCED == code) {
            // Add it into the map for quick invalidation if it has a valid RecordId.
            // A RecordId may be invalidated at any time (during a yield).  We need to get into
            // the WorkingSet as quickly as possible to handle it.
            WorkingSetMember* member = _ws->get(id);

            // Planner must put a fetch before we get here.
            verify(member->hasObj());

            // We might be sorting something that was invalidated at some point.
            if (member->hasLoc()) {
                _wsidByDiskLoc[member->loc] = id;
            }

            // The data remains in the WorkingSet and we wrap the WSID with the sort key.
            SortableDataItem item;
            Status sortKeyStatus = _sortKeyGen->getSortKey(*member, &item.sortKey);
            if (!_sortKeyGen->getSortKey(*member, &item.sortKey).isOK()) {
                *out = WorkingSetCommon::allocateStatusMember(_ws, sortKeyStatus);
                return PlanStage::FAILURE;
            }
            item.wsid = id;
            if (member->hasLoc()) {
                // The RecordId breaks ties when sorting two WSMs with the same sort key.
                item.loc = member->loc;
            }

            addToBuffer(item);

            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // TODO: We don't need the lock for this.  We could ask for a yield and do this work
            // unlocked.  Also, this is performing a lot of work for one call to work(...)
            sortBuffer();
            _resultIterator = _data.begin();
            _sorted = true;
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "sort stage failed to read in results to sort from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else if (PlanStage::NEED_TIME == code) {
            ++_commonStats.needTime;
        } else if (PlanStage::NEED_YIELD == code) {
            ++_commonStats.needYield;
            *out = id;
        }

        return code;
    }

    // Returning results.
    verify(_resultIterator != _data.end());
    verify(_sorted);
    *out = _resultIterator->wsid;
    _resultIterator++;

    // If we're returning something, take it out of our DL -> WSID map so that future
    // calls to invalidate don't cause us to take action for a DL we're done with.
    WorkingSetMember* member = _ws->get(*out);
    if (member->hasLoc()) {
        _wsidByDiskLoc.erase(member->loc);
    }

    ++_commonStats.advanced;
    return PlanStage::ADVANCED;
}
Beispiel #24
0
    PlanStage::StageState TwoDNear::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (!_initted) {
            _initted = true;

            if ( !_params.collection )
                return PlanStage::IS_EOF;

            IndexCatalog* indexCatalog = _params.collection->getIndexCatalog();

            IndexDescriptor* desc = indexCatalog->findIndexByKeyPattern(_params.indexKeyPattern);
            if ( desc == NULL )
                return PlanStage::IS_EOF;
            TwoDAccessMethod* am = static_cast<TwoDAccessMethod*>( indexCatalog->getIndex( desc ) );

            auto_ptr<twod_exec::GeoSearch> search;
            search.reset(new twod_exec::GeoSearch(_params.collection,
                                           am,
                                           _params.nearQuery.centroid.oldPoint,
                                           _params.numWanted, 
                                           _params.filter,
                                           _params.nearQuery.maxDistance,
                                           _params.nearQuery.isNearSphere ? twod_exec::GEO_SPHERE
                                                                          : twod_exec::GEO_PLANE));

            // This is where all the work is done.  :(
            search->exec();
            _specificStats.objectsLoaded = search->_objectsLoaded;
            _specificStats.nscanned = search->_lookedAt;

            for (twod_exec::GeoHopper::Holder::iterator it = search->_points.begin();
                 it != search->_points.end(); it++) {

                WorkingSetID id = _workingSet->allocate();
                WorkingSetMember* member = _workingSet->get(id);
                member->loc = it->_loc;
                member->obj = _params.collection->docFor(member->loc);
                member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
                if (_params.addDistMeta) {
                    member->addComputed(new GeoDistanceComputedData(it->_distance));
                }
                if (_params.addPointMeta) {
                    member->addComputed(new GeoNearPointComputedData(it->_pt));
                }
                _results.push(Result(id, it->_distance));
                _invalidationMap.insert(pair<DiskLoc, WorkingSetID>(it->_loc, id));
            }
        }

        if (isEOF()) { return PlanStage::IS_EOF; }

        Result result = _results.top();
        _results.pop();
        *out = result.id;

        // Remove from invalidation map.
        WorkingSetMember* member = _workingSet->get(*out);

        // The WSM may have been mutated or deleted so it may not have a loc.
        if (member->hasLoc()) {
            typedef multimap<DiskLoc, WorkingSetID>::iterator MMIT;
            pair<MMIT, MMIT> range = _invalidationMap.equal_range(member->loc);
            for (MMIT it = range.first; it != range.second; ++it) {
                if (it->second == *out) {
                    _invalidationMap.erase(it);
                    break;
                }
            }
        }

        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }
Beispiel #25
0
PlanStage::StageState AndSortedStage::getTargetLoc(WorkingSetID* out) {
    verify(numeric_limits<size_t>::max() == _targetNode);
    verify(WorkingSet::INVALID_ID == _targetId);
    verify(RecordId() == _targetLoc);

    // Pick one, and get a loc to work toward.
    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState state = _children[0]->work(&id);

    if (PlanStage::ADVANCED == state) {
        WorkingSetMember* member = _ws->get(id);

        // Maybe the child had an invalidation.  We intersect RecordId(s) so we can't do anything
        // with this WSM.
        if (!member->hasLoc()) {
            _ws->flagForReview(id);
            return PlanStage::NEED_TIME;
        }

        verify(member->hasLoc());

        // We have a value from one child to AND with.
        _targetNode = 0;
        _targetId = id;
        _targetLoc = member->loc;

        // We have to AND with all other children.
        for (size_t i = 1; i < _children.size(); ++i) {
            _workingTowardRep.push(i);
        }

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    } else if (PlanStage::IS_EOF == state) {
        _isEOF = true;
        return state;
    } else if (PlanStage::FAILURE == state) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "sorted AND stage failed to read in results from first child";
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        }
        _isEOF = true;
        return state;
    } else {
        if (PlanStage::NEED_TIME == state) {
            ++_commonStats.needTime;
        } else if (PlanStage::NEED_YIELD == state) {
            ++_commonStats.needYield;
            *out = id;
        }

        // NEED_TIME, NEED_YIELD.
        return state;
    }
}
Beispiel #26
0
    PlanStage::StageState UpdateStage::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (isEOF()) { return PlanStage::IS_EOF; }

        if (doneUpdating()) {
            // Even if we're done updating, we may have some inserting left to do.
            if (needInsert()) {
                doInsert();
            }

            // At this point either we're done updating and there was no insert to do,
            // or we're done updating and we're done inserting. Either way, we're EOF.
            invariant(isEOF());
            return PlanStage::IS_EOF;
        }

        // If we're here, then we still have to ask for results from the child and apply
        // updates to them. We should only get here if the collection exists.
        invariant(_collection);

        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState status = _child->work(&id);

        if (PlanStage::ADVANCED == status) {
            // Need to get these things from the result returned by the child.
            DiskLoc loc;
            BSONObj oldObj;

            WorkingSetMember* member = _ws->get(id);

            if (!member->hasLoc()) {
                _ws->free(id);
                const std::string errmsg = "update stage failed to read member w/ loc from child";
                *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError,
                                                                          errmsg));
                return PlanStage::FAILURE;
            }
            loc = member->loc;

            // Updates can't have projections. This means that covering analysis will always add
            // a fetch. We should always get fetched data, and never just key data.
            invariant(member->hasObj());
            oldObj = member->obj;

            // If we're here, then we have retrieved both a DiskLoc and the corresponding
            // unowned object from the child stage. Since we have the object and the diskloc,
            // we can free the WSM.
            _ws->free(id);

            // We fill this with the new locs of moved doc so we don't double-update.
            if (_updatedLocs && _updatedLocs->count(loc) > 0) {
                // Found a loc that we already updated.
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }

            ++_specificStats.nMatched;

            // Do the update and return.
            transformAndUpdate(oldObj, loc);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::IS_EOF == status) {
            // The child is out of results, but we might not be done yet because we still might
            // have to do an insert.
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        else if (PlanStage::FAILURE == status) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it failed, in which case
            // 'id' is valid.  If ID is invalid, we create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                const std::string errmsg = "delete stage failed to read in results from child";
                *out = WorkingSetCommon::allocateStatusMember(_ws, Status(ErrorCodes::InternalError,
                                                                          errmsg));
                return PlanStage::FAILURE;
            }
            return status;
        }
        else {
            if (PlanStage::NEED_TIME == status) {
                ++_commonStats.needTime;
            }
            return status;
        }
    }
Beispiel #27
0
    Runner::RunnerState MultiPlanRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return Runner::RUNNER_DEAD; }
        if (_failure) { return Runner::RUNNER_ERROR; }

        // If we haven't picked the best plan yet...
        if (NULL == _bestPlan) {
            if (!pickBestPlan(NULL, objOut)) {
                verify(_failure || _killed);
                if (_killed) { return Runner::RUNNER_DEAD; }
                if (_failure) { return Runner::RUNNER_ERROR; }
            }
        }

        // Look for an already produced result that provides the data the caller wants.
        while (!_alreadyProduced.empty()) {
            WorkingSetID id = _alreadyProduced.front();
            _alreadyProduced.pop_front();

            WorkingSetMember* member = _bestPlan->getWorkingSet()->get(id);

            // Note that this copies code from PlanExecutor.
            if (NULL != objOut) {
                if (WorkingSetMember::LOC_AND_IDX == member->state) {
                    if (1 != member->keyData.size()) {
                        _bestPlan->getWorkingSet()->free(id);
                        // If the caller needs the key data and the WSM doesn't have it, drop the
                        // result and carry on.
                        continue;
                    }
                    *objOut = member->keyData[0].keyData;
                }
                else if (member->hasObj()) {
                    *objOut = member->obj;
                }
                else {
                    // If the caller needs an object and the WSM doesn't have it, drop and
                    // try the next result.
                    _bestPlan->getWorkingSet()->free(id);
                    continue;
                }
            }

            if (NULL != dlOut) {
                if (member->hasLoc()) {
                    *dlOut = member->loc;
                }
                else {
                    // If the caller needs a DiskLoc and the WSM doesn't have it, drop and carry on.
                    _bestPlan->getWorkingSet()->free(id);
                    continue;
                }
            }

            // If we're here, the caller has all the data needed and we've set the out
            // parameters.  Remove the result from the WorkingSet.
            _bestPlan->getWorkingSet()->free(id);
            return Runner::RUNNER_ADVANCED;
        }

        RunnerState state = _bestPlan->getNext(objOut, dlOut);

        if (Runner::RUNNER_ERROR == state && (NULL != _backupSolution)) {
            QLOG() << "Best plan errored out switching to backup\n";
            // Uncache the bad solution if we fall back
            // on the backup solution.
            //
            // XXX: Instead of uncaching we should find a way for the
            // cached plan runner to fall back on a different solution
            // if the best solution fails. Alternatively we could try to
            // defer cache insertion to be after the first produced result.
            Database* db = cc().database();
            verify(NULL != db);
            Collection* collection = db->getCollection(_query->ns());
            verify(NULL != collection);
            PlanCache* cache = collection->infoCache()->getPlanCache();
            cache->remove(*_query);

            _bestPlan.reset(_backupPlan);
            _backupPlan = NULL;
            _bestSolution.reset(_backupSolution);
            _backupSolution = NULL;
            _alreadyProduced = _backupAlreadyProduced;
            return getNext(objOut, dlOut);
        }

        if (NULL != _backupSolution && Runner::RUNNER_ADVANCED == state) {
            QLOG() << "Best plan had a blocking sort, became unblocked, deleting backup plan\n";
            delete _backupSolution;
            delete _backupPlan;
            _backupSolution = NULL;
            _backupPlan = NULL;
            // TODO: free from WS?
            _backupAlreadyProduced.clear();
        }

        return state;
    }
Beispiel #28
0
PlanStage::StageState OrStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState childStatus = _children[_currentChild]->work(&id);

    if (PlanStage::ADVANCED == childStatus) {
        WorkingSetMember* member = _ws->get(id);

        // If we're deduping (and there's something to dedup by)
        if (_dedup && member->hasLoc()) {
            ++_specificStats.dupsTested;

            // ...and we've seen the RecordId before
            if (_seen.end() != _seen.find(member->loc)) {
                // ...drop it.
                ++_specificStats.dupsDropped;
                _ws->free(id);
                return PlanStage::NEED_TIME;
            } else {
                // Otherwise, note that we've seen it.
                _seen.insert(member->loc);
            }
        }

        if (Filter::passes(member, _filter)) {
            // Match!  return it.
            *out = id;
            return PlanStage::ADVANCED;
        } else {
            // Does not match, try again.
            _ws->free(id);
            return PlanStage::NEED_TIME;
        }
    } else if (PlanStage::IS_EOF == childStatus) {
        // Done with _currentChild, move to the next one.
        ++_currentChild;

        // Maybe we're out of children.
        if (isEOF()) {
            return PlanStage::IS_EOF;
        } else {
            return PlanStage::NEED_TIME;
        }
    } else if (PlanStage::FAILURE == childStatus || PlanStage::DEAD == childStatus) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "OR stage failed to read in results from child " << _currentChild;
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        }
        return childStatus;
    } else if (PlanStage::NEED_YIELD == childStatus) {
        *out = id;
    }

    // NEED_TIME, ERROR, NEED_YIELD, pass them up.
    return childStatus;
}
Beispiel #29
0
        void run() {
            Client::WriteContext ctx(&_txn, ns());
            
            Database* db = ctx.ctx().db();
            Collection* coll = db->getCollection(&_txn, ns());
            if (!coll) {
                coll = db->createCollection(&_txn, ns());
            }
            fillData();

            // The data we're going to later invalidate.
            set<DiskLoc> locs;
            getLocs(&locs, coll);

            // Build the mock scan stage which feeds the data.
            WorkingSet ws;
            auto_ptr<MockStage> ms(new MockStage(&ws));
            insertVarietyOfObjects(ms.get(), coll);

            SortStageParams params;
            params.collection = coll;
            params.pattern = BSON("foo" << 1);
            params.limit = limit();
            auto_ptr<SortStage> ss(new SortStage(&_txn, params, &ws, ms.get()));

            const int firstRead = 10;

            // Have sort read in data from the mock stage.
            for (int i = 0; i < firstRead; ++i) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                PlanStage::StageState status = ss->work(&id);
                ASSERT_NOT_EQUALS(PlanStage::ADVANCED, status);
            }

            // We should have read in the first 'firstRead' locs.  Invalidate the first.
            ss->saveState();
            set<DiskLoc>::iterator it = locs.begin();
            ss->invalidate(*it++, INVALIDATION_DELETION);
            ss->restoreState(&_txn);

            // Read the rest of the data from the mock stage.
            while (!ms->isEOF()) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                ss->work(&id);
            }

            // Release to prevent double-deletion.
            ms.release();

            // Let's just invalidate everything now.
            ss->saveState();
            while (it != locs.end()) {
                ss->invalidate(*it++, INVALIDATION_DELETION);
            }
            ss->restoreState(&_txn);

            // Invalidation of data in the sort stage fetches it but passes it through.
            int count = 0;
            while (!ss->isEOF()) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                PlanStage::StageState status = ss->work(&id);
                if (PlanStage::ADVANCED != status) { continue; }
                WorkingSetMember* member = ws.get(id);
                ASSERT(member->hasObj());
                ASSERT(!member->hasLoc());
                ++count;
            }
            ctx.commit();

            // Returns all docs.
            ASSERT_EQUALS(limit() ? limit() : numObj(), count);
        }
    bool MultiPlanRunner::workAllPlans(BSONObj* objOut) {
        bool doneWorking = false;

        for (size_t i = 0; i < _candidates.size(); ++i) {
            CandidatePlan& candidate = _candidates[i];
            if (candidate.failed) { continue; }

            // Yield, if we can yield ourselves.
            if (NULL != _yieldPolicy.get() && _yieldPolicy->shouldYield()) {
                saveState();
                _yieldPolicy->yield();
                if (_failure || _killed) { return false; }
                restoreState();
            }

            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState state = candidate.root->work(&id);

            if (PlanStage::ADVANCED == state) {
                // Save result for later.
                candidate.results.push_back(id);

                // Once a plan returns enough results, stop working.
                if (candidate.results.size()
                    >= size_t(internalQueryPlanEvaluationMaxResults)) {
                    doneWorking = true;
                }
            }
            else if (PlanStage::NEED_TIME == state) {
                // Fall through to yield check at end of large conditional.
            }
            else if (PlanStage::NEED_FETCH == state) {
                // id has a loc and refers to an obj we need to fetch.
                WorkingSetMember* member = candidate.ws->get(id);

                // This must be true for somebody to request a fetch and can only change when an
                // invalidation happens, which is when we give up a lock.  Don't give up the
                // lock between receiving the NEED_FETCH and actually fetching(?).
                verify(member->hasLoc());

                // Actually bring record into memory.
                Record* record = member->loc.rec();

                // If we're allowed to, go to disk outside of the lock.
                if (NULL != _yieldPolicy.get()) {
                    saveState();
                    _yieldPolicy->yield(record);
                    if (_failure || _killed) { return false; }
                    restoreState();
                }
                else {
                    // We're set to manually yield.  We go to disk in the lock.
                    record->touch();
                }

                // Record should be in memory now.  Log if it's not.
                if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) {
                    OCCASIONALLY {
                        warning() << "Record wasn't in memory immediately after fetch: "
                            << member->loc.toString() << endl;
                    }
                }

                // Note that we're not freeing id.  Fetch semantics say that we shouldn't.
            }