void run() {
        // Various variables we'll need.
        dbtests::WriteContextForTests ctx(&_opCtx, nss.ns());
        Collection* coll = ctx.getCollection();
        ASSERT(coll);
        const int targetDocIndex = 0;
        const BSONObj query = BSON("foo" << BSON("$gte" << targetDocIndex));
        const auto ws = make_unique<WorkingSet>();
        const unique_ptr<CanonicalQuery> cq(canonicalize(query));

        // Get the RecordIds that would be returned by an in-order scan.
        vector<RecordId> recordIds;
        getRecordIds(coll, CollectionScanParams::FORWARD, &recordIds);

        // Configure a QueuedDataStage to pass the first object in the collection back in a
        // RID_AND_OBJ state.
        auto qds = make_unique<QueuedDataStage>(&_opCtx, ws.get());
        WorkingSetID id = ws->allocate();
        WorkingSetMember* member = ws->get(id);
        member->recordId = recordIds[targetDocIndex];
        const BSONObj oldDoc = BSON("_id" << targetDocIndex << "foo" << targetDocIndex);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), oldDoc);
        ws->transitionToRecordIdAndObj(id);
        qds->pushBack(id);

        // Configure the delete.
        auto deleteParams = std::make_unique<DeleteStageParams>();
        deleteParams->returnDeleted = true;
        deleteParams->canonicalQuery = cq.get();

        const auto deleteStage = make_unique<DeleteStage>(
            &_opCtx, std::move(deleteParams), ws.get(), coll, qds.release());

        const DeleteStats* stats = static_cast<const DeleteStats*>(deleteStage->getSpecificStats());

        // Should return advanced.
        id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = deleteStage->work(&id);
        ASSERT_EQUALS(PlanStage::ADVANCED, state);

        // Make sure the returned value is what we expect it to be.

        // Should give us back a valid id.
        ASSERT_TRUE(WorkingSet::INVALID_ID != id);
        WorkingSetMember* resultMember = ws->get(id);
        // With an owned copy of the object, with no RecordId.
        ASSERT_TRUE(resultMember->hasOwnedObj());
        ASSERT_FALSE(resultMember->hasRecordId());
        ASSERT_EQUALS(resultMember->getState(), WorkingSetMember::OWNED_OBJ);
        ASSERT_TRUE(resultMember->obj.value().isOwned());

        // Should be the old value.
        ASSERT_BSONOBJ_EQ(resultMember->obj.value(), oldDoc);

        // Should have done the delete.
        ASSERT_EQUALS(stats->docsDeleted, 1U);
        // That should be it.
        id = WorkingSet::INVALID_ID;
        ASSERT_EQUALS(PlanStage::IS_EOF, deleteStage->work(&id));
    }
Beispiel #2
0
PlanStage::StageState OplogStart::workBackwardsScan(WorkingSetID* out) {
    PlanStage::StageState state = child()->work(out);

    // EOF.  Just start from the beginning, which is where we've hit.
    if (PlanStage::IS_EOF == state) {
        _done = true;
        return state;
    }

    if (PlanStage::ADVANCED != state) {
        return state;
    }

    WorkingSetMember* member = _workingSet->get(*out);
    verify(member->hasObj());
    verify(member->hasRecordId());

    if (!_filter->matchesBSON(member->obj.value())) {
        _done = true;
        // RecordId is returned in *out.
        return PlanStage::ADVANCED;
    } else {
        _workingSet->free(*out);
        return PlanStage::NEED_TIME;
    }
}
Beispiel #3
0
PlanStage::StageState AndHashStage::readFirstChild(WorkingSetID* out) {
    verify(_currentChild == 0);

    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState childStatus = workChild(0, &id);

    if (PlanStage::ADVANCED == childStatus) {
        WorkingSetMember* member = _ws->get(id);

        // The child must give us a WorkingSetMember with a record id, since we intersect index keys
        // based on the record id. The planner ensures that the child stage can never produce an WSM
        // with no record id.
        invariant(member->hasRecordId());

        if (!_dataMap.insert(std::make_pair(member->recordId, id)).second) {
            // Didn't insert because we already had this RecordId inside the map. This should only
            // happen if we're seeing a newer copy of the same doc in a more recent snapshot.
            // Throw out the newer copy of the doc.
            _ws->free(id);
            return PlanStage::NEED_TIME;
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
        member->makeObjOwnedIfNeeded();

        // Update memory stats.
        _memUsage += member->getMemUsage();

        return PlanStage::NEED_TIME;
    } else if (PlanStage::IS_EOF == childStatus) {
        // Done reading child 0.
        _currentChild = 1;

        // If our first child was empty, don't scan any others, no possible results.
        if (_dataMap.empty()) {
            _hashingChildren = false;
            return PlanStage::IS_EOF;
        }

        _specificStats.mapAfterChild.push_back(_dataMap.size());

        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == childStatus || PlanStage::DEAD == childStatus) {
        // The stage which produces a failure is responsible for allocating a working set member
        // with error details.
        invariant(WorkingSet::INVALID_ID != id);
        *out = id;
        return childStatus;
    } else {
        if (PlanStage::NEED_YIELD == childStatus) {
            *out = id;
        }

        return childStatus;
    }
}
Beispiel #4
0
void CachedPlanStage::doInvalidate(OperationContext* txn,
                                   const RecordId& dl,
                                   InvalidationType type) {
    for (auto it = _results.begin(); it != _results.end(); ++it) {
        WorkingSetMember* member = _ws->get(*it);
        if (member->hasRecordId() && member->recordId == dl) {
            WorkingSetCommon::fetchAndInvalidateRecordId(txn, member, _collection);
        }
    }
}
Beispiel #5
0
void FetchStage::doInvalidate(OperationContext* opCtx, const RecordId& dl, InvalidationType type) {
    // It's possible that the recordId getting invalidated is the one we're about to
    // fetch. In this case we do a "forced fetch" and put the WSM in owned object state.
    if (WorkingSet::INVALID_ID != _idRetrying) {
        WorkingSetMember* member = _ws->get(_idRetrying);
        if (member->hasRecordId() && (member->recordId == dl)) {
            // Fetch it now and kill the recordId.
            WorkingSetCommon::fetchAndInvalidateRecordId(opCtx, member, _collection);
        }
    }
}
Beispiel #6
0
void IDHackStage::doInvalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) {
    // Since updates can't mutate the '_id' field, we can ignore mutation invalidations.
    if (INVALIDATION_MUTATION == type) {
        return;
    }

    // It's possible that the RecordId getting invalidated is the one we're about to
    // fetch. In this case we do a "forced fetch" and put the WSM in owned object state.
    if (WorkingSet::INVALID_ID != _idBeingPagedIn) {
        WorkingSetMember* member = _workingSet->get(_idBeingPagedIn);
        if (member->hasRecordId() && (member->recordId == dl)) {
            // Fetch it now and kill the RecordId.
            WorkingSetCommon::fetchAndInvalidateRecordId(txn, member, _collection);
        }
    }
}
// static
bool WorkingSetCommon::fetch(OperationContext* txn,
                             WorkingSet* workingSet,
                             WorkingSetID id,
                             unowned_ptr<SeekableRecordCursor> cursor) {
    WorkingSetMember* member = workingSet->get(id);

    // The RecordFetcher should already have been transferred out of the WSM and used.
    invariant(!member->hasFetcher());

    // We should have a RecordId but need to retrieve the obj. Get the obj now and reset all WSM
    // state appropriately.
    invariant(member->hasRecordId());

    member->obj.reset();
    auto record = cursor->seekExact(member->recordId);
    if (!record) {
        return false;
    }

    member->obj = {txn->recoveryUnit()->getSnapshotId(), record->data.releaseToBson()};

    if (member->isSuspicious) {
        // Make sure that all of the keyData is still valid for this copy of the document.
        // This ensures both that index-provided filters and sort orders still hold.
        // TODO provide a way for the query planner to opt out of this checking if it is
        // unneeded due to the structure of the plan.
        invariant(!member->keyData.empty());
        for (size_t i = 0; i < member->keyData.size(); i++) {
            BSONObjSet keys;
            // There's no need to compute the prefixes of the indexed fields that cause the index to
            // be multikey when ensuring the keyData is still valid.
            MultikeyPaths* multikeyPaths = nullptr;
            member->keyData[i].index->getKeys(member->obj.value(), &keys, multikeyPaths);
            if (!keys.count(member->keyData[i].keyData)) {
                // document would no longer be at this position in the index.
                return false;
            }
        }

        member->isSuspicious = false;
    }

    member->keyData.clear();
    workingSet->transitionToRecordIdAndObj(id);
    return true;
}
    void getRecordIds(Collection* collection,
                      CollectionScanParams::Direction direction,
                      vector<RecordId>* out) {
        WorkingSet ws;

        CollectionScanParams params;
        params.direction = direction;
        params.tailable = false;

        unique_ptr<CollectionScan> scan(new CollectionScan(&_opCtx, collection, params, &ws, NULL));
        while (!scan->isEOF()) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState state = scan->work(&id);
            if (PlanStage::ADVANCED == state) {
                WorkingSetMember* member = ws.get(id);
                verify(member->hasRecordId());
                out->push_back(member->recordId);
            }
        }
    }
Beispiel #9
0
void MergeSortStage::doInvalidate(OperationContext* txn,
                                  const RecordId& dl,
                                  InvalidationType type) {
    // Go through our data and see if we're holding on to the invalidated RecordId.
    for (list<StageWithValue>::iterator valueIt = _mergingData.begin();
         valueIt != _mergingData.end();
         valueIt++) {
        WorkingSetMember* member = _ws->get(valueIt->id);
        if (member->hasRecordId() && (dl == member->recordId)) {
            // Fetch the about-to-be mutated result.
            WorkingSetCommon::fetchAndInvalidateRecordId(txn, member, _collection);
            ++_specificStats.forcedFetches;
        }
    }

    // If we see the deleted RecordId again it is not the same record as it once was so we still
    // want to return it.
    if (_dedup && INVALIDATION_DELETION == type) {
        _seen.erase(dl);
    }
}
Beispiel #10
0
PlanStage::StageState MergeSortStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (!_noResultToMerge.empty()) {
        // We have some child that we don't have a result from.  Each child must have a result
        // in order to pick the minimum result among all our children.  Work a child.
        PlanStage* child = _noResultToMerge.front();
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child->work(&id);

        if (PlanStage::ADVANCED == code) {
            WorkingSetMember* member = _ws->get(id);

            // If we're deduping...
            if (_dedup) {
                if (!member->hasRecordId()) {
                    // Can't dedup data unless there's a RecordId.  We go ahead and use its
                    // result.
                    _noResultToMerge.pop();
                } else {
                    ++_specificStats.dupsTested;
                    // ...and there's a RecordId and and we've seen the RecordId before
                    if (_seen.end() != _seen.find(member->recordId)) {
                        // ...drop it.
                        _ws->free(id);
                        ++_specificStats.dupsDropped;
                        return PlanStage::NEED_TIME;
                    } else {
                        // Otherwise, note that we've seen it.
                        _seen.insert(member->recordId);
                        // We're going to use the result from the child, so we remove it from
                        // the queue of children without a result.
                        _noResultToMerge.pop();
                    }
                }
            } else {
                // Not deduping.  We use any result we get from the child.  Remove the child
                // from the queue of things without a result.
                _noResultToMerge.pop();
            }

            // Store the result in our list.
            StageWithValue value;
            value.id = id;
            value.stage = child;
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwnedIfNeeded();
            _mergingData.push_front(value);

            // Insert the result (indirectly) into our priority queue.
            _merging.push(_mergingData.begin());

            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // There are no more results possible from this child.  Don't bother with it
            // anymore.
            _noResultToMerge.pop();
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "merge sort stage failed to read in results from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else {
            if (PlanStage::NEED_YIELD == code) {
                *out = id;
            }

            return code;
        }
    }

    // If we're here, for each non-EOF child, we have a valid WSID.
    verify(!_merging.empty());

    // Get the 'min' WSID.  _merging is a priority queue so its top is the smallest.
    MergingRef top = _merging.top();
    _merging.pop();

    // Since we're returning the WSID that came from top->stage, we need to work(...) it again
    // to get a new result.
    _noResultToMerge.push(top->stage);

    // Save the ID that we're returning and remove the returned result from our data.
    WorkingSetID idToTest = top->id;
    _mergingData.erase(top);

    // Return the min.
    *out = idToTest;

    return PlanStage::ADVANCED;
}
Beispiel #11
0
PlanExecutor::ExecState PlanExecutor::getNextImpl(Snapshotted<BSONObj>* objOut, RecordId* dlOut) {
    if (MONGO_FAIL_POINT(planExecutorAlwaysFails)) {
        Status status(ErrorCodes::OperationFailed,
                      str::stream() << "PlanExecutor hit planExecutorAlwaysFails fail point");
        *objOut =
            Snapshotted<BSONObj>(SnapshotId(), WorkingSetCommon::buildMemberStatusObject(status));

        return PlanExecutor::FAILURE;
    }

    invariant(_currentState == kUsable);
    if (isMarkedAsKilled()) {
        if (NULL != objOut) {
            Status status(ErrorCodes::OperationFailed,
                          str::stream() << "Operation aborted because: " << *_killReason);
            *objOut = Snapshotted<BSONObj>(SnapshotId(),
                                           WorkingSetCommon::buildMemberStatusObject(status));
        }
        return PlanExecutor::DEAD;
    }

    if (!_stash.empty()) {
        invariant(objOut && !dlOut);
        *objOut = {SnapshotId(), _stash.front()};
        _stash.pop();
        return PlanExecutor::ADVANCED;
    }

    // When a stage requests a yield for document fetch, it gives us back a RecordFetcher*
    // to use to pull the record into memory. We take ownership of the RecordFetcher here,
    // deleting it after we've had a chance to do the fetch. For timing-based yields, we
    // just pass a NULL fetcher.
    unique_ptr<RecordFetcher> fetcher;

    // Incremented on every writeConflict, reset to 0 on any successful call to _root->work.
    size_t writeConflictsInARow = 0;

    for (;;) {
        // These are the conditions which can cause us to yield:
        //   1) The yield policy's timer elapsed, or
        //   2) some stage requested a yield due to a document fetch, or
        //   3) we need to yield and retry due to a WriteConflictException.
        // In all cases, the actual yielding happens here.
        if (_yieldPolicy->shouldYield()) {
            if (!_yieldPolicy->yield(fetcher.get())) {
                // A return of false from a yield should only happen if we've been killed during the
                // yield.
                invariant(isMarkedAsKilled());

                if (NULL != objOut) {
                    Status status(ErrorCodes::OperationFailed,
                                  str::stream() << "Operation aborted because: " << *_killReason);
                    *objOut = Snapshotted<BSONObj>(
                        SnapshotId(), WorkingSetCommon::buildMemberStatusObject(status));
                }
                return PlanExecutor::DEAD;
            }
        }

        // We're done using the fetcher, so it should be freed. We don't want to
        // use the same RecordFetcher twice.
        fetcher.reset();

        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState code = _root->work(&id);

        if (code != PlanStage::NEED_YIELD)
            writeConflictsInARow = 0;

        if (PlanStage::ADVANCED == code) {
            WorkingSetMember* member = _workingSet->get(id);
            bool hasRequestedData = true;

            if (NULL != objOut) {
                if (WorkingSetMember::RID_AND_IDX == member->getState()) {
                    if (1 != member->keyData.size()) {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    } else {
                        // TODO: currently snapshot ids are only associated with documents, and
                        // not with index keys.
                        *objOut = Snapshotted<BSONObj>(SnapshotId(), member->keyData[0].keyData);
                    }
                } else if (member->hasObj()) {
                    *objOut = member->obj;
                } else {
                    _workingSet->free(id);
                    hasRequestedData = false;
                }
            }

            if (NULL != dlOut) {
                if (member->hasRecordId()) {
                    *dlOut = member->recordId;
                } else {
                    _workingSet->free(id);
                    hasRequestedData = false;
                }
            }

            if (hasRequestedData) {
                _workingSet->free(id);
                return PlanExecutor::ADVANCED;
            }
            // This result didn't have the data the caller wanted, try again.
        } else if (PlanStage::NEED_YIELD == code) {
            if (id == WorkingSet::INVALID_ID) {
                if (!_yieldPolicy->canAutoYield())
                    throw WriteConflictException();
                CurOp::get(_opCtx)->debug().writeConflicts++;
                writeConflictsInARow++;
                WriteConflictException::logAndBackoff(
                    writeConflictsInARow, "plan execution", _nss.ns());

            } else {
                WorkingSetMember* member = _workingSet->get(id);
                invariant(member->hasFetcher());
                // Transfer ownership of the fetcher. Next time around the loop a yield will
                // happen.
                fetcher.reset(member->releaseFetcher());
            }

            // If we're allowed to, we will yield next time through the loop.
            if (_yieldPolicy->canAutoYield())
                _yieldPolicy->forceYield();
        } else if (PlanStage::NEED_TIME == code) {
            // Fall through to yield check at end of large conditional.
        } else if (PlanStage::IS_EOF == code) {
            if (shouldWaitForInserts()) {
                const bool locksReacquiredAfterYield = waitForInserts();
                if (locksReacquiredAfterYield) {
                    // There may be more results, try to get more data.
                    continue;
                }
                invariant(isMarkedAsKilled());
                if (objOut) {
                    Status status(ErrorCodes::OperationFailed,
                                  str::stream() << "Operation aborted because: " << *_killReason);
                    *objOut = Snapshotted<BSONObj>(
                        SnapshotId(), WorkingSetCommon::buildMemberStatusObject(status));
                }
                return PlanExecutor::DEAD;
            } else {
                return PlanExecutor::IS_EOF;
            }
        } else {
            invariant(PlanStage::DEAD == code || PlanStage::FAILURE == code);

            if (NULL != objOut) {
                BSONObj statusObj;
                WorkingSetCommon::getStatusMemberObject(*_workingSet, id, &statusObj);
                *objOut = Snapshotted<BSONObj>(SnapshotId(), statusObj);
            }

            return (PlanStage::DEAD == code) ? PlanExecutor::DEAD : PlanExecutor::FAILURE;
        }
    }
}
Beispiel #12
0
PlanStage::StageState FetchStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // If there's an obj there, there is no fetching to perform.
        if (member->hasObj()) {
            ++_specificStats.alreadyHasObj;
        } else {
            // We need a valid RecordId to fetch from and this is the only state that has one.
            verify(WorkingSetMember::RID_AND_IDX == member->getState());
            verify(member->hasRecordId());

            try {
                if (!_cursor)
                    _cursor = _collection->getCursor(getOpCtx());

                if (auto fetcher = _cursor->fetcherForId(member->recordId)) {
                    // There's something to fetch. Hand the fetcher off to the WSM, and pass up
                    // a fetch request.
                    _idRetrying = id;
                    member->setFetcher(fetcher.release());
                    *out = id;
                    return NEED_YIELD;
                }

                // The doc is already in memory, so go ahead and grab it. Now we have a RecordId
                // as well as an unowned object
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) {
                    _ws->free(id);
                    return NEED_TIME;
                }
            } catch (const WriteConflictException&) {
                // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may
                // be freed when we yield.
                member->makeObjOwnedIfNeeded();
                _idRetrying = id;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            }
        }

        return returnIfMatches(member, id, out);
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        // The stage which produces a failure is responsible for allocating a working set member
        // with error details.
        invariant(WorkingSet::INVALID_ID != id);
        *out = id;
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}
Beispiel #13
0
PlanStage::StageState AndHashStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Fast-path for one of our children being EOF immediately.  We work each child a few times.
    // If it hits EOF, the AND cannot output anything.  If it produces a result, we stash that
    // result in _lookAheadResults.
    if (_lookAheadResults.empty()) {
        // INVALID_ID means that the child didn't produce a valid result.

        // We specifically are not using .resize(size, value) here because C++11 builds don't
        // seem to resolve WorkingSet::INVALID_ID during linking.
        _lookAheadResults.resize(_children.size());
        for (size_t i = 0; i < _children.size(); ++i) {
            _lookAheadResults[i] = WorkingSet::INVALID_ID;
        }

        // Work each child some number of times until it's either EOF or produces
        // a result.  If it's EOF this whole stage will be EOF.  If it produces a
        // result we cache it for later.
        for (size_t i = 0; i < _children.size(); ++i) {
            auto& child = _children[i];
            for (size_t j = 0; j < kLookAheadWorks; ++j) {
                // Cache the result in _lookAheadResults[i].
                StageState childStatus = child->work(&_lookAheadResults[i]);

                if (PlanStage::IS_EOF == childStatus) {
                    // A child went right to EOF.  Bail out.
                    _hashingChildren = false;
                    _dataMap.clear();
                    return PlanStage::IS_EOF;
                } else if (PlanStage::ADVANCED == childStatus) {
                    // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we
                    // yield.
                    _ws->get(_lookAheadResults[i])->makeObjOwnedIfNeeded();
                    break;  // Stop looking at this child.
                } else if (PlanStage::FAILURE == childStatus || PlanStage::DEAD == childStatus) {
                    // The stage which produces a failure is responsible for allocating a working
                    // set member with error details.
                    invariant(WorkingSet::INVALID_ID != _lookAheadResults[i]);
                    *out = _lookAheadResults[i];
                    _hashingChildren = false;
                    _dataMap.clear();
                    return childStatus;
                }
                // We ignore NEED_TIME. TODO: what do we want to do if we get NEED_YIELD here?
            }
        }

        // We did a bunch of work above, return NEED_TIME to be fair.
        return PlanStage::NEED_TIME;
    }

    // An AND is either reading the first child into the hash table, probing against the hash
    // table with subsequent children, or checking the last child's results to see if they're
    // in the hash table.

    // We read the first child into our hash table.
    if (_hashingChildren) {
        // Check memory usage of previously hashed results.
        if (_memUsage > _maxMemUsage) {
            mongoutils::str::stream ss;
            ss << "hashed AND stage buffered data usage of " << _memUsage
               << " bytes exceeds internal limit of " << kDefaultMaxMemUsageBytes << " bytes";
            Status status(ErrorCodes::Overflow, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            return PlanStage::FAILURE;
        }

        if (0 == _currentChild) {
            return readFirstChild(out);
        } else if (_currentChild < _children.size() - 1) {
            return hashOtherChildren(out);
        } else {
            _hashingChildren = false;
            // We don't hash our last child.  Instead, we probe the table created from the
            // previous children, returning results in the order of the last child.
            // Fall through to below.
        }
    }

    // Returning results.  We read from the last child and return the results that are in our
    // hash map.

    // We should be EOF if we're not hashing results and the dataMap is empty.
    verify(!_dataMap.empty());

    // We probe _dataMap with the last child.
    verify(_currentChild == _children.size() - 1);

    // Get the next result for the (_children.size() - 1)-th child.
    StageState childStatus = workChild(_children.size() - 1, out);
    if (PlanStage::ADVANCED != childStatus) {
        return childStatus;
    }

    // We know that we've ADVANCED.  See if the WSM is in our table.
    WorkingSetMember* member = _ws->get(*out);

    // The child must give us a WorkingSetMember with a record id, since we intersect index keys
    // based on the record id. The planner ensures that the child stage can never produce an WSM
    // with no record id.
    invariant(member->hasRecordId());

    DataMap::iterator it = _dataMap.find(member->recordId);
    if (_dataMap.end() == it) {
        // Child's output wasn't in every previous child.  Throw it out.
        _ws->free(*out);
        return PlanStage::NEED_TIME;
    } else {
        // Child's output was in every previous child.  Merge any key data in
        // the child's output and free the child's just-outputted WSM.
        WorkingSetID hashID = it->second;
        _dataMap.erase(it);

        AndCommon::mergeFrom(_ws, hashID, *member);
        _ws->free(*out);

        *out = hashID;
        return PlanStage::ADVANCED;
    }
}
    void run() {
        OldClientWriteContext ctx(&_txn, ns());
        Database* db = ctx.db();
        Collection* coll = db->getCollection(ns());
        if (!coll) {
            WriteUnitOfWork wuow(&_txn);
            coll = db->createCollection(&_txn, ns());
            wuow.commit();
        }

        WorkingSet ws;
        // Sort by foo:1
        MergeSortStageParams msparams;
        msparams.pattern = BSON("foo" << 1);
        auto ms = make_unique<MergeSortStage>(&_txn, msparams, &ws, coll);

        IndexScanParams params;
        params.bounds.isSimpleRange = true;
        params.bounds.startKey = objWithMinKey(1);
        params.bounds.endKey = objWithMaxKey(1);
        params.bounds.endKeyInclusive = true;
        params.direction = 1;

        // Index 'a'+i has foo equal to 'i'.

        int numIndices = 20;
        for (int i = 0; i < numIndices; ++i) {
            // 'a', 'b', ...
            string index(1, 'a' + i);
            insert(BSON(index << 1 << "foo" << i));

            BSONObj indexSpec = BSON(index << 1 << "foo" << 1);
            addIndex(indexSpec);
            params.descriptor = getIndex(indexSpec, coll);
            ms->addChild(new IndexScan(&_txn, params, &ws, NULL));
        }

        set<RecordId> recordIds;
        getRecordIds(&recordIds, coll);

        set<RecordId>::iterator it = recordIds.begin();

        // Get 10 results.  Should be getting results in order of 'recordIds'.
        int count = 0;
        while (!ms->isEOF() && count < 10) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status = ms->work(&id);
            if (PlanStage::ADVANCED != status) {
                continue;
            }

            WorkingSetMember* member = ws.get(id);
            ASSERT_EQUALS(member->recordId, *it);
            BSONElement elt;
            string index(1, 'a' + count);
            ASSERT(member->getFieldDotted(index, &elt));
            ASSERT_EQUALS(1, elt.numberInt());
            ASSERT(member->getFieldDotted("foo", &elt));
            ASSERT_EQUALS(count, elt.numberInt());
            ++count;
            ++it;
        }

        // Invalidate recordIds[11].  Should force a fetch and return the deleted document.
        ms->saveState();
        ms->invalidate(&_txn, *it, INVALIDATION_DELETION);
        ms->restoreState();

        // Make sure recordIds[11] was fetched for us.
        {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status;
            do {
                status = ms->work(&id);
            } while (PlanStage::ADVANCED != status);

            WorkingSetMember* member = ws.get(id);
            ASSERT(!member->hasRecordId());
            ASSERT(member->hasObj());
            string index(1, 'a' + count);
            BSONElement elt;
            ASSERT_TRUE(member->getFieldDotted(index, &elt));
            ASSERT_EQUALS(1, elt.numberInt());
            ASSERT(member->getFieldDotted("foo", &elt));
            ASSERT_EQUALS(count, elt.numberInt());

            ++it;
            ++count;
        }

        // And get the rest.
        while (!ms->isEOF()) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState status = ms->work(&id);
            if (PlanStage::ADVANCED != status) {
                continue;
            }

            WorkingSetMember* member = ws.get(id);
            ASSERT_EQUALS(member->recordId, *it);
            BSONElement elt;
            string index(1, 'a' + count);
            ASSERT_TRUE(member->getFieldDotted(index, &elt));
            ASSERT_EQUALS(1, elt.numberInt());
            ASSERT(member->getFieldDotted("foo", &elt));
            ASSERT_EQUALS(count, elt.numberInt());
            ++count;
            ++it;
        }
    }
Beispiel #15
0
PlanStage::StageState SortStage::doWork(WorkingSetID* out) {
    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes.load());
    if (_memUsage > maxBytes) {
        str::stream ss;
        ss << "Sort operation used more than the maximum " << maxBytes
           << " bytes of RAM. Add an index, or specify a smaller limit.";
        Status status(ErrorCodes::OperationFailed, ss);
        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        return PlanStage::FAILURE;
    }

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Still reading in results to sort.
    if (!_sorted) {
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child()->work(&id);

        if (PlanStage::ADVANCED == code) {
            WorkingSetMember* member = _ws->get(id);

            SortableDataItem item;
            item.wsid = id;

            // We extract the sort key from the WSM's computed data. This must have been generated
            // by a SortKeyGeneratorStage descendent in the execution tree.
            auto sortKeyComputedData =
                static_cast<const SortKeyComputedData*>(member->getComputed(WSM_SORT_KEY));
            item.sortKey = sortKeyComputedData->getSortKey();

            if (member->hasRecordId()) {
                // The RecordId breaks ties when sorting two WSMs with the same sort key.
                item.recordId = member->recordId;
            }

            addToBuffer(item);

            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // TODO: We don't need the lock for this.  We could ask for a yield and do this work
            // unlocked.  Also, this is performing a lot of work for one call to work(...)
            sortBuffer();
            _resultIterator = _data.begin();
            _sorted = true;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code) {
            // The stage which produces a failure is responsible for allocating a working set member
            // with error details.
            invariant(WorkingSet::INVALID_ID != id);
            *out = id;
            return code;
        } else if (PlanStage::NEED_YIELD == code) {
            *out = id;
        }

        return code;
    }

    // Returning results.
    verify(_resultIterator != _data.end());
    verify(_sorted);
    *out = _resultIterator->wsid;
    _resultIterator++;

    return PlanStage::ADVANCED;
}
Beispiel #16
0
PlanStage::StageState AndHashStage::hashOtherChildren(WorkingSetID* out) {
    verify(_currentChild > 0);

    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState childStatus = workChild(_currentChild, &id);

    if (PlanStage::ADVANCED == childStatus) {
        WorkingSetMember* member = _ws->get(id);

        // The child must give us a WorkingSetMember with a record id, since we intersect index keys
        // based on the record id. The planner ensures that the child stage can never produce an
        // WSM with no record id.
        invariant(member->hasRecordId());

        if (_dataMap.end() == _dataMap.find(member->recordId)) {
            // Ignore.  It's not in any previous child.
        } else {
            // We have a hit.  Copy data into the WSM we already have.
            _seenMap.insert(member->recordId);
            WorkingSetID olderMemberID = _dataMap[member->recordId];
            WorkingSetMember* olderMember = _ws->get(olderMemberID);
            size_t memUsageBefore = olderMember->getMemUsage();

            AndCommon::mergeFrom(_ws, olderMemberID, *member);

            // Update memory stats.
            _memUsage += olderMember->getMemUsage() - memUsageBefore;
        }
        _ws->free(id);
        return PlanStage::NEED_TIME;
    } else if (PlanStage::IS_EOF == childStatus) {
        // Finished with a child.
        ++_currentChild;

        // Keep elements of _dataMap that are in _seenMap.
        DataMap::iterator it = _dataMap.begin();
        while (it != _dataMap.end()) {
            if (_seenMap.end() == _seenMap.find(it->first)) {
                DataMap::iterator toErase = it;
                ++it;

                // Update memory stats.
                WorkingSetMember* member = _ws->get(toErase->second);
                _memUsage -= member->getMemUsage();

                _ws->free(toErase->second);
                _dataMap.erase(toErase);
            } else {
                ++it;
            }
        }

        _specificStats.mapAfterChild.push_back(_dataMap.size());

        _seenMap.clear();

        // _dataMap is now the intersection of the first _currentChild nodes.

        // If we have nothing to AND with after finishing any child, stop.
        if (_dataMap.empty()) {
            _hashingChildren = false;
            return PlanStage::IS_EOF;
        }

        // We've finished scanning all children.  Return results with the next call to work().
        if (_currentChild == _children.size()) {
            _hashingChildren = false;
        }

        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == childStatus || PlanStage::DEAD == childStatus) {
        // The stage which produces a failure is responsible for allocating a working set member
        // with error details.
        invariant(WorkingSet::INVALID_ID != id);
        *out = id;
        return childStatus;
    } else {
        if (PlanStage::NEED_YIELD == childStatus) {
            *out = id;
        }

        return childStatus;
    }
}
Beispiel #17
0
PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (doneUpdating()) {
        // Even if we're done updating, we may have some inserting left to do.
        if (needInsert()) {
            // TODO we may want to handle WriteConflictException here. Currently we bounce it
            // out to a higher level since if this WCEs it is likely that we raced with another
            // upsert that may have matched our query, and therefore this may need to perform an
            // update rather than an insert. Bouncing to the higher level allows restarting the
            // query in this case.
            doInsert();

            invariant(isEOF());
            if (_params.request->shouldReturnNewDocs()) {
                // Want to return the document we just inserted, create it as a WorkingSetMember
                // so that we can return it.
                BSONObj newObj = _specificStats.objInserted;
                *out = _ws->allocate();
                WorkingSetMember* member = _ws->get(*out);
                member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(),
                                                   newObj.getOwned());
                member->transitionToOwnedObj();
                return PlanStage::ADVANCED;
            }
        }

        // At this point either we're done updating and there was no insert to do,
        // or we're done updating and we're done inserting. Either way, we're EOF.
        invariant(isEOF());
        return PlanStage::IS_EOF;
    }

    // If we're here, then we still have to ask for results from the child and apply
    // updates to them. We should only get here if the collection exists.
    invariant(_collection);

    // It is possible that after an update was applied, a WriteConflictException
    // occurred and prevented us from returning ADVANCED with the requested version
    // of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.request->shouldReturnAnyDocs());

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        // Need to get these things from the result returned by the child.
        RecordId recordId;

        WorkingSetMember* member = _ws->get(id);

        // We want to free this member when we return, unless we need to retry updating or returning
        // it.
        ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

        if (!member->hasRecordId()) {
            // We expect to be here because of an invalidation causing a force-fetch.
            ++_specificStats.nInvalidateSkips;
            return PlanStage::NEED_TIME;
        }
        recordId = member->recordId;

        // Updates can't have projections. This means that covering analysis will always add
        // a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        // We fill this with the new RecordIds of moved doc so we don't double-update.
        if (_updatedRecordIds && _updatedRecordIds->count(recordId) > 0) {
            // Found a RecordId that refers to a document we had already updated. Note that
            // we can never remove from _updatedRecordIds because updates by other clients
            // could cause us to encounter a document again later.
            return PlanStage::NEED_TIME;
        }

        bool docStillMatches;
        try {
            docStillMatches = write_stage_common::ensureStillMatches(
                _collection, getOpCtx(), _ws, id, _params.canonicalQuery);
        } catch (const WriteConflictException&) {
            // There was a problem trying to detect if the document still exists, so retry.
            memberFreer.Dismiss();
            return prepareToRetryWSM(id, out);
        }

        if (!docStillMatches) {
            // Either the document has been deleted, or it has been updated such that it no longer
            // matches the predicate.
            if (shouldRestartUpdateIfNoLongerMatches(_params)) {
                throw WriteConflictException();
            }
            return PlanStage::NEED_TIME;
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
        // is allowed to free the memory.
        member->makeObjOwnedIfNeeded();

        // Save state before making changes
        WorkingSetCommon::prepareForSnapshotChange(_ws);
        try {
            child()->saveState();
        } catch (const WriteConflictException&) {
            std::terminate();
        }

        // If we care about the pre-updated version of the doc, save it out here.
        BSONObj oldObj;
        if (_params.request->shouldReturnOldDocs()) {
            oldObj = member->obj.value().getOwned();
        }

        BSONObj newObj;
        try {
            // Do the update, get us the new version of the doc.
            newObj = transformAndUpdate(member->obj, recordId);
        } catch (const WriteConflictException&) {
            memberFreer.Dismiss();  // Keep this member around so we can retry updating it.
            return prepareToRetryWSM(id, out);
        }

        // Set member's obj to be the doc we want to return.
        if (_params.request->shouldReturnAnyDocs()) {
            if (_params.request->shouldReturnNewDocs()) {
                member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(),
                                                   newObj.getOwned());
            } else {
                invariant(_params.request->shouldReturnOldDocs());
                member->obj.setValue(oldObj);
            }
            member->recordId = RecordId();
            member->transitionToOwnedObj();
        }

        // This should be after transformAndUpdate to make sure we actually updated this doc.
        ++_specificStats.nMatched;

        // Restore state after modification

        // As restoreState may restore (recreate) cursors, make sure to restore the
        // state outside of the WritUnitOfWork.
        try {
            child()->restoreState();
        } catch (const WriteConflictException&) {
            // Note we don't need to retry updating anything in this case since the update
            // already was committed. However, we still need to return the updated document
            // (if it was requested).
            if (_params.request->shouldReturnAnyDocs()) {
                // member->obj should refer to the document we want to return.
                invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

                _idReturning = id;
                // Keep this member around so that we can return it on the next work() call.
                memberFreer.Dismiss();
            }
            *out = WorkingSet::INVALID_ID;
            return NEED_YIELD;
        }

        if (_params.request->shouldReturnAnyDocs()) {
            // member->obj should refer to the document we want to return.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            memberFreer.Dismiss();  // Keep this member around so we can return it.
            *out = id;
            return PlanStage::ADVANCED;
        }

        return PlanStage::NEED_TIME;
    } else if (PlanStage::IS_EOF == status) {
        // The child is out of results, but we might not be done yet because we still might
        // have to do an insert.
        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which case
        // 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "update stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
            return PlanStage::FAILURE;
        }
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}
Beispiel #18
0
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    if (_idRetrying != WorkingSet::INVALID_ID) {
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    } else {
        auto status = child()->work(&id);

        switch (status) {
            case PlanStage::ADVANCED:
                break;

            case PlanStage::FAILURE:
            case PlanStage::DEAD:
                // The stage which produces a failure is responsible for allocating a working set
                // member with error details.
                invariant(WorkingSet::INVALID_ID != id);
                *out = id;
                return status;

            case PlanStage::NEED_TIME:
                return status;

            case PlanStage::NEED_YIELD:
                *out = id;
                return status;

            case PlanStage::IS_EOF:
                return status;

            default:
                MONGO_UNREACHABLE;
        }
    }

    // We advanced, or are retrying, and id is set to the WSM to work on.
    WorkingSetMember* member = _ws->get(id);

    // We want to free this member when we return, unless we need to retry deleting or returning it.
    ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

    invariant(member->hasRecordId());
    RecordId recordId = member->recordId;
    // Deletes can't have projections. This means that covering analysis will always add
    // a fetch. We should always get fetched data, and never just key data.
    invariant(member->hasObj());

    // Ensure the document still exists and matches the predicate.
    bool docStillMatches;
    try {
        docStillMatches = write_stage_common::ensureStillMatches(
            _collection, getOpCtx(), _ws, id, _params.canonicalQuery);
    } catch (const WriteConflictException&) {
        // There was a problem trying to detect if the document still exists, so retry.
        memberFreer.Dismiss();
        return prepareToRetryWSM(id, out);
    }

    if (!docStillMatches) {
        // Either the document has already been deleted, or it has been updated such that it no
        // longer matches the predicate.
        if (shouldRestartDeleteIfNoLongerMatches(_params)) {
            throw WriteConflictException();
        }
        return PlanStage::NEED_TIME;
    }

    // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState() is
    // allowed to free the memory.
    if (_params.returnDeleted) {
        // Save a copy of the document that is about to get deleted, but keep it in the RID_AND_OBJ
        // state in case we need to retry deleting it.
        BSONObj deletedDoc = member->obj.value();
        member->obj.setValue(deletedDoc.getOwned());
    }

    // TODO: Do we want to buffer docs and delete them in a group rather than saving/restoring state
    // repeatedly?

    WorkingSetCommon::prepareForSnapshotChange(_ws);
    try {
        child()->saveState();
    } catch (const WriteConflictException&) {
        std::terminate();
    }

    // Do the write, unless this is an explain.
    if (!_params.isExplain) {
        try {
            WriteUnitOfWork wunit(getOpCtx());
            _collection->deleteDocument(getOpCtx(),
                                        _params.stmtId,
                                        recordId,
                                        _params.opDebug,
                                        _params.fromMigrate,
                                        false,
                                        _params.returnDeleted ? Collection::StoreDeletedDoc::On
                                                              : Collection::StoreDeletedDoc::Off);
            wunit.commit();
        } catch (const WriteConflictException&) {
            memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
            return prepareToRetryWSM(id, out);
        }
    }
    ++_specificStats.docsDeleted;

    if (_params.returnDeleted) {
        // After deleting the document, the RecordId associated with this member is invalid.
        // Remove the 'recordId' from the WorkingSetMember before returning it.
        member->recordId = RecordId();
        member->transitionToOwnedObj();
    }

    // As restoreState may restore (recreate) cursors, cursors are tied to the transaction in which
    // they are created, and a WriteUnitOfWork is a transaction, make sure to restore the state
    // outside of the WriteUnitOfWork.
    try {
        child()->restoreState();
    } catch (const WriteConflictException&) {
        // Note we don't need to retry anything in this case since the delete already was committed.
        // However, we still need to return the deleted document (if it was requested).
        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            _idReturning = id;
            // Keep this member around so that we can return it on the next work() call.
            memberFreer.Dismiss();
        }
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // member->obj should refer to the deleted document.
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        memberFreer.Dismiss();  // Keep this member around so we can return it.
        *out = id;
        return PlanStage::ADVANCED;
    }

    return PlanStage::NEED_TIME;
}
Beispiel #19
0
PlanStage::StageState SortStage::doWork(WorkingSetID* out) {
    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes);
    if (_memUsage > maxBytes) {
        mongoutils::str::stream ss;
        ss << "Sort operation used more than the maximum " << maxBytes
           << " bytes of RAM. Add an index, or specify a smaller limit.";
        Status status(ErrorCodes::OperationFailed, ss);
        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        return PlanStage::FAILURE;
    }

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Still reading in results to sort.
    if (!_sorted) {
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child()->work(&id);

        if (PlanStage::ADVANCED == code) {
            // Add it into the map for quick invalidation if it has a valid RecordId.
            // A RecordId may be invalidated at any time (during a yield).  We need to get into
            // the WorkingSet as quickly as possible to handle it.
            WorkingSetMember* member = _ws->get(id);

            // Planner must put a fetch before we get here.
            verify(member->hasObj());

            // We might be sorting something that was invalidated at some point.
            if (member->hasRecordId()) {
                _wsidByRecordId[member->recordId] = id;
            }

            SortableDataItem item;
            item.wsid = id;

            // We extract the sort key from the WSM's computed data. This must have been generated
            // by a SortKeyGeneratorStage descendent in the execution tree.
            auto sortKeyComputedData =
                static_cast<const SortKeyComputedData*>(member->getComputed(WSM_SORT_KEY));
            item.sortKey = sortKeyComputedData->getSortKey();

            if (member->hasRecordId()) {
                // The RecordId breaks ties when sorting two WSMs with the same sort key.
                item.recordId = member->recordId;
            }

            addToBuffer(item);

            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // TODO: We don't need the lock for this.  We could ask for a yield and do this work
            // unlocked.  Also, this is performing a lot of work for one call to work(...)
            sortBuffer();
            _resultIterator = _data.begin();
            _sorted = true;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "sort stage failed to read in results to sort from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else if (PlanStage::NEED_YIELD == code) {
            *out = id;
        }

        return code;
    }

    // Returning results.
    verify(_resultIterator != _data.end());
    verify(_sorted);
    *out = _resultIterator->wsid;
    _resultIterator++;

    // If we're returning something, take it out of our DL -> WSID map so that future
    // calls to invalidate don't cause us to take action for a DL we're done with.
    WorkingSetMember* member = _ws->get(*out);
    if (member->hasRecordId()) {
        _wsidByRecordId.erase(member->recordId);
    }

    return PlanStage::ADVANCED;
}
    void run() {
        // Populate the collection.
        for (int i = 0; i < 50; ++i) {
            insert(BSON("_id" << i << "foo" << i));
        }
        ASSERT_EQUALS(50U, count(BSONObj()));

        // Various variables we'll need.
        dbtests::WriteContextForTests ctx(&_opCtx, nss.ns());
        OpDebug* opDebug = &CurOp::get(_opCtx)->debug();
        Collection* coll = ctx.getCollection();
        ASSERT(coll);
        UpdateRequest request(nss);
        const CollatorInterface* collator = nullptr;
        UpdateDriver driver(new ExpressionContext(&_opCtx, collator));
        const int targetDocIndex = 10;
        const BSONObj query = BSON("foo" << BSON("$gte" << targetDocIndex));
        const auto ws = make_unique<WorkingSet>();
        const unique_ptr<CanonicalQuery> cq(canonicalize(query));

        // Get the RecordIds that would be returned by an in-order scan.
        vector<RecordId> recordIds;
        getRecordIds(coll, CollectionScanParams::FORWARD, &recordIds);

        // Populate the request.
        request.setQuery(query);
        request.setUpdates(fromjson("{$set: {x: 0}}"));
        request.setSort(BSONObj());
        request.setMulti(false);
        request.setReturnDocs(UpdateRequest::RETURN_NEW);

        const std::map<StringData, std::unique_ptr<ExpressionWithPlaceholder>> arrayFilters;

        ASSERT_DOES_NOT_THROW(driver.parse(request.getUpdates(), arrayFilters, request.isMulti()));

        // Configure a QueuedDataStage to pass the first object in the collection back in a
        // RID_AND_OBJ state.
        auto qds = make_unique<QueuedDataStage>(&_opCtx, ws.get());
        WorkingSetID id = ws->allocate();
        WorkingSetMember* member = ws->get(id);
        member->recordId = recordIds[targetDocIndex];
        const BSONObj oldDoc = BSON("_id" << targetDocIndex << "foo" << targetDocIndex);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), oldDoc);
        ws->transitionToRecordIdAndObj(id);
        qds->pushBack(id);

        // Configure the update.
        UpdateStageParams updateParams(&request, &driver, opDebug);
        updateParams.canonicalQuery = cq.get();

        auto updateStage =
            make_unique<UpdateStage>(&_opCtx, updateParams, ws.get(), coll, qds.release());

        // Should return advanced.
        id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = updateStage->work(&id);
        ASSERT_EQUALS(PlanStage::ADVANCED, state);

        // Make sure the returned value is what we expect it to be.

        // Should give us back a valid id.
        ASSERT_TRUE(WorkingSet::INVALID_ID != id);
        WorkingSetMember* resultMember = ws->get(id);
        // With an owned copy of the object, with no RecordId.
        ASSERT_TRUE(resultMember->hasOwnedObj());
        ASSERT_FALSE(resultMember->hasRecordId());
        ASSERT_EQUALS(resultMember->getState(), WorkingSetMember::OWNED_OBJ);
        ASSERT_TRUE(resultMember->obj.value().isOwned());

        // Should be the new value.
        BSONObj newDoc = BSON("_id" << targetDocIndex << "foo" << targetDocIndex << "x" << 0);
        ASSERT_BSONOBJ_EQ(resultMember->obj.value(), newDoc);

        // Should have done the update.
        vector<BSONObj> objs;
        getCollContents(coll, &objs);
        ASSERT_BSONOBJ_EQ(objs[targetDocIndex], newDoc);

        // That should be it.
        id = WorkingSet::INVALID_ID;
        ASSERT_EQUALS(PlanStage::IS_EOF, updateStage->work(&id));
    }
Beispiel #21
0
/**
 * addToBuffer() and sortBuffer() work differently based on the
 * configured limit. addToBuffer() is also responsible for
 * performing some accounting on the overall memory usage to
 * make sure we're not using too much memory.
 *
 * limit == 0:
 *     addToBuffer() - Adds item to vector.
 *     sortBuffer() - Sorts vector.
 * limit == 1:
 *     addToBuffer() - Replaces first item in vector with max of
 *                     current and new item.
 *                     Updates memory usage if item was replaced.
 *     sortBuffer() - Does nothing.
 * limit > 1:
 *     addToBuffer() - Does not update vector. Adds item to set.
 *                     If size of set exceeds limit, remove item from set
 *                     with lowest key. Updates memory usage accordingly.
 *     sortBuffer() - Copies items from set to vectors.
 */
void SortStage::addToBuffer(const SortableDataItem& item) {
    // Holds ID of working set member to be freed at end of this function.
    WorkingSetID wsidToFree = WorkingSet::INVALID_ID;

    WorkingSetMember* member = _ws->get(item.wsid);
    if (_limit == 0) {
        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
        member->makeObjOwnedIfNeeded();
        _data.push_back(item);
        _memUsage += member->getMemUsage();
    } else if (_limit == 1) {
        if (_data.empty()) {
            member->makeObjOwnedIfNeeded();
            _data.push_back(item);
            _memUsage = member->getMemUsage();
            return;
        }
        wsidToFree = item.wsid;
        const WorkingSetComparator& cmp = *_sortKeyComparator;
        // Compare new item with existing item in vector.
        if (cmp(item, _data[0])) {
            wsidToFree = _data[0].wsid;
            member->makeObjOwnedIfNeeded();
            _data[0] = item;
            _memUsage = member->getMemUsage();
        }
    } else {
        // Update data item set instead of vector
        // Limit not reached - insert and return
        vector<SortableDataItem>::size_type limit(_limit);
        if (_dataSet->size() < limit) {
            member->makeObjOwnedIfNeeded();
            _dataSet->insert(item);
            _memUsage += member->getMemUsage();
            return;
        }
        // Limit will be exceeded - compare with item with lowest key
        // If new item does not have a lower key value than last item,
        // do nothing.
        wsidToFree = item.wsid;
        SortableDataItemSet::const_iterator lastItemIt = --(_dataSet->end());
        const SortableDataItem& lastItem = *lastItemIt;
        const WorkingSetComparator& cmp = *_sortKeyComparator;
        if (cmp(item, lastItem)) {
            _memUsage -= _ws->get(lastItem.wsid)->getMemUsage();
            _memUsage += member->getMemUsage();
            wsidToFree = lastItem.wsid;
            // According to std::set iterator validity rules,
            // it does not matter which of erase()/insert() happens first.
            // Here, we choose to erase first to release potential resources
            // used by the last item and to keep the scope of the iterator to a minimum.
            _dataSet->erase(lastItemIt);
            member->makeObjOwnedIfNeeded();
            _dataSet->insert(item);
        }
    }

    // If the working set ID is valid, remove from
    // RecordId invalidation map and free from working set.
    if (wsidToFree != WorkingSet::INVALID_ID) {
        WorkingSetMember* member = _ws->get(wsidToFree);
        if (member->hasRecordId()) {
            _wsidByRecordId.erase(member->recordId);
        }
        _ws->free(wsidToFree);
    }
}
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    if (_idRetrying != WorkingSet::INVALID_ID) {
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    } else {
        auto status = child()->work(&id);

        switch (status) {
        case PlanStage::ADVANCED:
            break;

        case PlanStage::FAILURE:
        case PlanStage::DEAD:
            *out = id;

            // If a stage fails, it may create a status WSM to indicate why it failed, in which
            // case 'id' is valid.  If ID is invalid, we create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                const std::string errmsg = "delete stage failed to read in results from child";
                *out = WorkingSetCommon::allocateStatusMember(
                           _ws, Status(ErrorCodes::InternalError, errmsg));
            }
            return status;

        case PlanStage::NEED_TIME:
            return status;

        case PlanStage::NEED_YIELD:
            *out = id;
            return status;

        case PlanStage::IS_EOF:
            return status;

        default:
            MONGO_UNREACHABLE;
        }
    }

    // We advanced, or are retrying, and id is set to the WSM to work on.
    WorkingSetMember* member = _ws->get(id);

    // We want to free this member when we return, unless we need to retry it.
    ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

    if (!member->hasRecordId()) {
        // We expect to be here because of an invalidation causing a force-fetch.
        ++_specificStats.nInvalidateSkips;
        return PlanStage::NEED_TIME;
    }
    RecordId recordId = member->recordId;
    // Deletes can't have projections. This means that covering analysis will always add
    // a fetch. We should always get fetched data, and never just key data.
    invariant(member->hasObj());

    try {
        // If the snapshot changed, then we have to make sure we have the latest copy of the
        // doc and that it still matches.
        std::unique_ptr<SeekableRecordCursor> cursor;
        if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) {
            cursor = _collection->getCursor(getOpCtx());
            if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) {
                // Doc is already deleted. Nothing more to do.
                return PlanStage::NEED_TIME;
            }

            // Make sure the re-fetched doc still matches the predicate.
            if (_params.canonicalQuery &&
                    !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) {
                // Doesn't match.
                return PlanStage::NEED_TIME;
            }
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
        // is allowed to free the memory.
        if (_params.returnDeleted) {
            // Save a copy of the document that is about to get deleted, but keep it in the
            // RID_AND_OBJ state in case we need to retry deleting it.
            BSONObj deletedDoc = member->obj.value();
            member->obj.setValue(deletedDoc.getOwned());
        }

        // TODO: Do we want to buffer docs and delete them in a group rather than
        // saving/restoring state repeatedly?

        try {
            WorkingSetCommon::prepareForSnapshotChange(_ws);
            child()->saveState();
        } catch (const WriteConflictException& wce) {
            std::terminate();
        }

        // Do the write, unless this is an explain.
        if (!_params.isExplain) {
            WriteUnitOfWork wunit(getOpCtx());
            _collection->deleteDocument(getOpCtx(), recordId, _params.fromMigrate);
            wunit.commit();
        }

        ++_specificStats.docsDeleted;
    } catch (const WriteConflictException& wce) {
        // When we're doing a findAndModify with a sort, the sort will have a limit of 1, so will
        // not produce any more results even if there is another matching document. Re-throw the WCE
        // here so that these operations get another chance to find a matching document. The
        // findAndModify command should automatically retry if it gets a WCE.
        // TODO: this is not necessary if there was no sort specified.
        if (_params.returnDeleted) {
            throw;
        }
        _idRetrying = id;
        memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // After deleting the document, the RecordId associated with this member is invalid.
        // Remove the 'recordId' from the WorkingSetMember before returning it.
        member->recordId = RecordId();
        member->transitionToOwnedObj();
    }

    //  As restoreState may restore (recreate) cursors, cursors are tied to the
    //  transaction in which they are created, and a WriteUnitOfWork is a
    //  transaction, make sure to restore the state outside of the WritUnitOfWork.
    try {
        child()->restoreState();
    } catch (const WriteConflictException& wce) {
        // Note we don't need to retry anything in this case since the delete already
        // was committed. However, we still need to return the deleted document
        // (if it was requested).
        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            _idReturning = id;
            // Keep this member around so that we can return it on the next work() call.
            memberFreer.Dismiss();
        }
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // member->obj should refer to the deleted document.
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        memberFreer.Dismiss();  // Keep this member around so we can return it.
        *out = id;
        return PlanStage::ADVANCED;
    }

    return PlanStage::NEED_TIME;
}
PlanStage::StageState FetchStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // If there's an obj there, there is no fetching to perform.
        if (member->hasObj()) {
            ++_specificStats.alreadyHasObj;
        } else {
            // We need a valid RecordId to fetch from and this is the only state that has one.
            verify(WorkingSetMember::RID_AND_IDX == member->getState());
            verify(member->hasRecordId());

            try {
                if (!_cursor)
                    _cursor = _collection->getCursor(getOpCtx());

                if (auto fetcher = _cursor->fetcherForId(member->recordId)) {
                    // There's something to fetch. Hand the fetcher off to the WSM, and pass up
                    // a fetch request.
                    _idRetrying = id;
                    member->setFetcher(fetcher.release());
                    *out = id;
                    return NEED_YIELD;
                }

                // The doc is already in memory, so go ahead and grab it. Now we have a RecordId
                // as well as an unowned object
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) {
                    _ws->free(id);
                    return NEED_TIME;
                }
            } catch (const WriteConflictException& wce) {
                // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may
                // be freed when we yield.
                member->makeObjOwnedIfNeeded();
                _idRetrying = id;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            }
        }

        return returnIfMatches(member, id, out);
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "fetch stage failed to read in results from child";
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        }
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}