Example #1
0
PlanStage::StageState AndHashStage::readFirstChild(WorkingSetID* out) {
    verify(_currentChild == 0);

    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState childStatus = workChild(0, &id);

    if (PlanStage::ADVANCED == childStatus) {
        WorkingSetMember* member = _ws->get(id);

        // The child must give us a WorkingSetMember with a record id, since we intersect index keys
        // based on the record id. The planner ensures that the child stage can never produce an WSM
        // with no record id.
        invariant(member->hasRecordId());

        if (!_dataMap.insert(std::make_pair(member->recordId, id)).second) {
            // Didn't insert because we already had this RecordId inside the map. This should only
            // happen if we're seeing a newer copy of the same doc in a more recent snapshot.
            // Throw out the newer copy of the doc.
            _ws->free(id);
            return PlanStage::NEED_TIME;
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
        member->makeObjOwnedIfNeeded();

        // Update memory stats.
        _memUsage += member->getMemUsage();

        return PlanStage::NEED_TIME;
    } else if (PlanStage::IS_EOF == childStatus) {
        // Done reading child 0.
        _currentChild = 1;

        // If our first child was empty, don't scan any others, no possible results.
        if (_dataMap.empty()) {
            _hashingChildren = false;
            return PlanStage::IS_EOF;
        }

        _specificStats.mapAfterChild.push_back(_dataMap.size());

        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == childStatus || PlanStage::DEAD == childStatus) {
        // The stage which produces a failure is responsible for allocating a working set member
        // with error details.
        invariant(WorkingSet::INVALID_ID != id);
        *out = id;
        return childStatus;
    } else {
        if (PlanStage::NEED_YIELD == childStatus) {
            *out = id;
        }

        return childStatus;
    }
}
Example #2
0
    BSONObj getObj() const {
        if (!WorkingSetCommon::fetchIfUnfetched(_opCtx, _ws, _id, _recordCursor))
            throw DocumentDeletedException();

        WorkingSetMember* member = _ws->get(_id);

        // Make it owned since we are buffering results.
        member->makeObjOwnedIfNeeded();
        return member->obj.value();
    }
Example #3
0
Status CachedPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
    // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
    // execution work that happens here, so this is needed for the time accounting to
    // make sense.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    // If we work this many times during the trial period, then we will replan the
    // query from scratch.
    size_t maxWorksBeforeReplan =
        static_cast<size_t>(internalQueryCacheEvictionRatio * _decisionWorks);

    // The trial period ends without replanning if the cached plan produces this many results.
    size_t numResults = MultiPlanStage::getTrialPeriodNumToReturn(*_canonicalQuery);

    for (size_t i = 0; i < maxWorksBeforeReplan; ++i) {
        // Might need to yield between calls to work due to the timer elapsing.
        Status yieldStatus = tryYield(yieldPolicy);
        if (!yieldStatus.isOK()) {
            return yieldStatus;
        }

        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = child()->work(&id);

        if (PlanStage::ADVANCED == state) {
            // Save result for later.
            WorkingSetMember* member = _ws->get(id);
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwnedIfNeeded();
            _results.push_back(id);

            if (_results.size() >= numResults) {
                // Once a plan returns enough results, stop working. Update cache with stats
                // from this run and return.
                updatePlanCache();
                return Status::OK();
            }
        } else if (PlanStage::IS_EOF == state) {
            // Cached plan hit EOF quickly enough. No need to replan. Update cache with stats
            // from this run and return.
            updatePlanCache();
            return Status::OK();
        } else if (PlanStage::NEED_YIELD == state) {
            if (id == WorkingSet::INVALID_ID) {
                if (!yieldPolicy->allowedToYield()) {
                    throw WriteConflictException();
                }
            } else {
                WorkingSetMember* member = _ws->get(id);
                invariant(member->hasFetcher());
                // Transfer ownership of the fetcher and yield.
                _fetcher.reset(member->releaseFetcher());
            }

            if (yieldPolicy->allowedToYield()) {
                yieldPolicy->forceYield();
            }

            Status yieldStatus = tryYield(yieldPolicy);
            if (!yieldStatus.isOK()) {
                return yieldStatus;
            }
        } else if (PlanStage::FAILURE == state) {
            // On failure, fall back to replanning the whole query. We neither evict the
            // existing cache entry nor cache the result of replanning.
            BSONObj statusObj;
            WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);

            LOG(1) << "Execution of cached plan failed, falling back to replan."
                   << " query: " << _canonicalQuery->toStringShort()
                   << " planSummary: " << Explain::getPlanSummary(child().get())
                   << " status: " << statusObj;

            const bool shouldCache = false;
            return replan(yieldPolicy, shouldCache);
        } else if (PlanStage::DEAD == state) {
            BSONObj statusObj;
            WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);

            LOG(1) << "Execution of cached plan failed: PlanStage died"
                   << ", query: " << _canonicalQuery->toStringShort()
                   << " planSummary: " << Explain::getPlanSummary(child().get())
                   << " status: " << statusObj;

            return WorkingSetCommon::getMemberObjectStatus(statusObj);
        } else {
            invariant(PlanStage::NEED_TIME == state);
        }
    }

    // If we're here, the trial period took more than 'maxWorksBeforeReplan' work cycles. This
    // plan is taking too long, so we replan from scratch.
    LOG(1) << "Execution of cached plan required " << maxWorksBeforeReplan
           << " works, but was originally cached with only " << _decisionWorks
           << " works. Evicting cache entry and replanning query: "
           << _canonicalQuery->toStringShort()
           << " plan summary before replan: " << Explain::getPlanSummary(child().get());

    const bool shouldCache = true;
    return replan(yieldPolicy, shouldCache);
}
Example #4
0
/**
 * addToBuffer() and sortBuffer() work differently based on the
 * configured limit. addToBuffer() is also responsible for
 * performing some accounting on the overall memory usage to
 * make sure we're not using too much memory.
 *
 * limit == 0:
 *     addToBuffer() - Adds item to vector.
 *     sortBuffer() - Sorts vector.
 * limit == 1:
 *     addToBuffer() - Replaces first item in vector with max of
 *                     current and new item.
 *                     Updates memory usage if item was replaced.
 *     sortBuffer() - Does nothing.
 * limit > 1:
 *     addToBuffer() - Does not update vector. Adds item to set.
 *                     If size of set exceeds limit, remove item from set
 *                     with lowest key. Updates memory usage accordingly.
 *     sortBuffer() - Copies items from set to vectors.
 */
void SortStage::addToBuffer(const SortableDataItem& item) {
    // Holds ID of working set member to be freed at end of this function.
    WorkingSetID wsidToFree = WorkingSet::INVALID_ID;

    WorkingSetMember* member = _ws->get(item.wsid);
    if (_limit == 0) {
        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
        member->makeObjOwnedIfNeeded();
        _data.push_back(item);
        _memUsage += member->getMemUsage();
    } else if (_limit == 1) {
        if (_data.empty()) {
            member->makeObjOwnedIfNeeded();
            _data.push_back(item);
            _memUsage = member->getMemUsage();
            return;
        }
        wsidToFree = item.wsid;
        const WorkingSetComparator& cmp = *_sortKeyComparator;
        // Compare new item with existing item in vector.
        if (cmp(item, _data[0])) {
            wsidToFree = _data[0].wsid;
            member->makeObjOwnedIfNeeded();
            _data[0] = item;
            _memUsage = member->getMemUsage();
        }
    } else {
        // Update data item set instead of vector
        // Limit not reached - insert and return
        vector<SortableDataItem>::size_type limit(_limit);
        if (_dataSet->size() < limit) {
            member->makeObjOwnedIfNeeded();
            _dataSet->insert(item);
            _memUsage += member->getMemUsage();
            return;
        }
        // Limit will be exceeded - compare with item with lowest key
        // If new item does not have a lower key value than last item,
        // do nothing.
        wsidToFree = item.wsid;
        SortableDataItemSet::const_iterator lastItemIt = --(_dataSet->end());
        const SortableDataItem& lastItem = *lastItemIt;
        const WorkingSetComparator& cmp = *_sortKeyComparator;
        if (cmp(item, lastItem)) {
            _memUsage -= _ws->get(lastItem.wsid)->getMemUsage();
            _memUsage += member->getMemUsage();
            wsidToFree = lastItem.wsid;
            // According to std::set iterator validity rules,
            // it does not matter which of erase()/insert() happens first.
            // Here, we choose to erase first to release potential resources
            // used by the last item and to keep the scope of the iterator to a minimum.
            _dataSet->erase(lastItemIt);
            member->makeObjOwnedIfNeeded();
            _dataSet->insert(item);
        }
    }

    // If the working set ID is valid, remove from
    // RecordId invalidation map and free from working set.
    if (wsidToFree != WorkingSet::INVALID_ID) {
        WorkingSetMember* member = _ws->get(wsidToFree);
        if (member->hasLoc()) {
            _wsidByDiskLoc.erase(member->loc);
        }
        _ws->free(wsidToFree);
    }
}
bool MultiPlanStage::workAllPlans(size_t numResults, PlanYieldPolicy* yieldPolicy) {
    bool doneWorking = false;

    for (size_t ix = 0; ix < _candidates.size(); ++ix) {
        CandidatePlan& candidate = _candidates[ix];
        if (candidate.failed) {
            continue;
        }

        // Might need to yield between calls to work due to the timer elapsing.
        if (!(tryYield(yieldPolicy)).isOK()) {
            return false;
        }

        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = candidate.root->work(&id);

        if (PlanStage::ADVANCED == state) {
            // Save result for later.
            WorkingSetMember* member = candidate.ws->get(id);
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we choose to
            // return the results from the 'candidate' plan.
            member->makeObjOwnedIfNeeded();
            candidate.results.push(id);

            // Once a plan returns enough results, stop working.
            if (candidate.results.size() >= numResults) {
                doneWorking = true;
            }
        } else if (PlanStage::IS_EOF == state) {
            // First plan to hit EOF wins automatically.  Stop evaluating other plans.
            // Assumes that the ranking will pick this plan.
            doneWorking = true;
        } else if (PlanStage::NEED_YIELD == state) {
            invariant(id == WorkingSet::INVALID_ID);
            if (!yieldPolicy->canAutoYield()) {
                throw WriteConflictException();
            }

            if (yieldPolicy->canAutoYield()) {
                yieldPolicy->forceYield();
            }

            if (!(tryYield(yieldPolicy)).isOK()) {
                return false;
            }
        } else if (PlanStage::NEED_TIME != state) {
            // FAILURE or DEAD.  Do we want to just tank that plan and try the rest?  We
            // probably want to fail globally as this shouldn't happen anyway.

            candidate.failed = true;
            ++_failureCount;

            // Propagate most recent seen failure to parent.
            if (PlanStage::FAILURE == state) {
                _statusMemberId = id;
            }

            if (_failureCount == _candidates.size()) {
                _failure = true;
                return false;
            }
        }
    }

    return !doneWorking;
}
Example #6
0
// Set "toReturn" when NEED_YIELD.
PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* error) {
    //
    // Try to retrieve the next covered member
    //

    if (!_nextInterval) {
        StatusWith<CoveredInterval*> intervalStatus =
            nextInterval(getOpCtx(), _workingSet, _collection);
        if (!intervalStatus.isOK()) {
            _searchState = SearchState_Finished;
            *error = intervalStatus.getStatus();
            return PlanStage::FAILURE;
        }

        if (NULL == intervalStatus.getValue()) {
            _searchState = SearchState_Finished;
            return PlanStage::IS_EOF;
        }

        // CoveredInterval and its child stage are owned by _childrenIntervals
        _childrenIntervals.push_back(intervalStatus.getValue());
        _nextInterval = _childrenIntervals.back();
        _specificStats.intervalStats.emplace_back();
        _nextIntervalStats = &_specificStats.intervalStats.back();
        _nextIntervalStats->minDistanceAllowed = _nextInterval->minDistance;
        _nextIntervalStats->maxDistanceAllowed = _nextInterval->maxDistance;
        _nextIntervalStats->inclusiveMaxDistanceAllowed = _nextInterval->inclusiveMax;
    }

    WorkingSetID nextMemberID;
    PlanStage::StageState intervalState = _nextInterval->covering->work(&nextMemberID);

    if (PlanStage::IS_EOF == intervalState) {
        _searchState = SearchState_Advancing;
        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == intervalState) {
        *error = WorkingSetCommon::getMemberStatus(*_workingSet->get(nextMemberID));
        return intervalState;
    } else if (PlanStage::NEED_YIELD == intervalState) {
        *toReturn = nextMemberID;
        return intervalState;
    } else if (PlanStage::ADVANCED != intervalState) {
        return intervalState;
    }

    //
    // Try to buffer the next covered member
    //

    WorkingSetMember* nextMember = _workingSet->get(nextMemberID);

    // The child stage may not dedup so we must dedup them ourselves.
    if (_nextInterval->dedupCovering && nextMember->hasLoc()) {
        if (_seenDocuments.end() != _seenDocuments.find(nextMember->loc)) {
            _workingSet->free(nextMemberID);
            return PlanStage::NEED_TIME;
        }
    }

    ++_nextIntervalStats->numResultsBuffered;

    StatusWith<double> distanceStatus = computeDistance(nextMember);

    if (!distanceStatus.isOK()) {
        _searchState = SearchState_Finished;
        *error = distanceStatus.getStatus();
        return PlanStage::FAILURE;
    }

    // If the member's distance is in the current distance interval, add it to our buffered
    // results.
    double memberDistance = distanceStatus.getValue();

    // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
    nextMember->makeObjOwnedIfNeeded();
    _resultBuffer.push(SearchResult(nextMemberID, memberDistance));

    // Store the member's RecordId, if available, for quick invalidation
    if (nextMember->hasLoc()) {
        _seenDocuments.insert(std::make_pair(nextMember->loc, nextMemberID));
    }

    return PlanStage::NEED_TIME;
}
Example #7
0
PlanStage::StageState DeleteStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // We want to free this member when we return, unless we need to retry it.
        ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

        if (!member->hasLoc()) {
            // We expect to be here because of an invalidation causing a force-fetch, and
            // doc-locking storage engines do not issue invalidations.
            ++_specificStats.nInvalidateSkips;
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
        RecordId rloc = member->loc;
        // Deletes can't have projections. This means that covering analysis will always add
        // a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        try {
            // If the snapshot changed, then we have to make sure we have the latest copy of the
            // doc and that it still matches.
            std::unique_ptr<RecordCursor> cursor;
            if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) {
                cursor = _collection->getCursor(getOpCtx());
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) {
                    // Doc is already deleted. Nothing more to do.
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }

                // Make sure the re-fetched doc still matches the predicate.
                if (_params.canonicalQuery &&
                    !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) {
                    // Doesn't match.
                    ++_commonStats.needTime;
                    return PlanStage::NEED_TIME;
                }
            }

            // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
            // is allowed to free the memory.
            if (_params.returnDeleted) {
                member->makeObjOwnedIfNeeded();
            }

            // TODO: Do we want to buffer docs and delete them in a group rather than
            // saving/restoring state repeatedly?

            try {
                if (supportsDocLocking()) {
                    // Doc-locking engines require this before saveState() since they don't use
                    // invalidations.
                    WorkingSetCommon::prepareForSnapshotChange(_ws);
                }
                child()->saveState();
            } catch (const WriteConflictException& wce) {
                std::terminate();
            }

            if (_params.returnDeleted) {
                // Save a copy of the document that is about to get deleted.
                BSONObj deletedDoc = member->obj.value();
                member->obj.setValue(deletedDoc.getOwned());
                member->loc = RecordId();
                member->transitionToOwnedObj();
            }

            // Do the write, unless this is an explain.
            if (!_params.isExplain) {
                WriteUnitOfWork wunit(getOpCtx());
                _collection->deleteDocument(getOpCtx(), rloc);
                wunit.commit();
            }

            ++_specificStats.docsDeleted;
        } catch (const WriteConflictException& wce) {
            // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may be
            // freed when we yield.
            member->makeObjOwnedIfNeeded();
            _idRetrying = id;
            memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
            *out = WorkingSet::INVALID_ID;
            _commonStats.needYield++;
            return NEED_YIELD;
        }

        //  As restoreState may restore (recreate) cursors, cursors are tied to the
        //  transaction in which they are created, and a WriteUnitOfWork is a
        //  transaction, make sure to restore the state outside of the WritUnitOfWork.
        try {
            child()->restoreState();
        } catch (const WriteConflictException& wce) {
            // Note we don't need to retry anything in this case since the delete already
            // was committed. However, we still need to return the deleted document
            // (if it was requested).
            if (_params.returnDeleted) {
                // member->obj should refer to the deleted document.
                invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

                _idReturning = id;
                // Keep this member around so that we can return it on the next work() call.
                memberFreer.Dismiss();
            }
            *out = WorkingSet::INVALID_ID;
            _commonStats.needYield++;
            return NEED_YIELD;
        }

        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            memberFreer.Dismiss();  // Keep this member around so we can return it.
            *out = id;
            ++_commonStats.advanced;
            return PlanStage::ADVANCED;
        }

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which case
        // 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "delete stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
        }
        return status;
    } else if (PlanStage::NEED_TIME == status) {
        ++_commonStats.needTime;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
        ++_commonStats.needYield;
    }

    return status;
}
Example #8
0
PlanStage::StageState TextOrStage::addTerm(WorkingSetID wsid, WorkingSetID* out) {
    WorkingSetMember* wsm = _ws->get(wsid);
    invariant(wsm->getState() == WorkingSetMember::RID_AND_IDX);
    invariant(1 == wsm->keyData.size());
    const IndexKeyDatum newKeyData = wsm->keyData.back();  // copy to keep it around.
    TextRecordData* textRecordData = &_scores[wsm->recordId];

    if (textRecordData->score < 0) {
        // We have already rejected this document for not matching the filter.
        invariant(WorkingSet::INVALID_ID == textRecordData->wsid);
        _ws->free(wsid);
        return NEED_TIME;
    }

    if (WorkingSet::INVALID_ID == textRecordData->wsid) {
        // We haven't seen this RecordId before.
        invariant(textRecordData->score == 0);
        bool shouldKeep = true;
        if (_filter) {
            // We have not seen this document before and need to apply a filter.
            bool wasDeleted = false;
            try {
                TextMatchableDocument tdoc(getOpCtx(),
                                           newKeyData.indexKeyPattern,
                                           newKeyData.keyData,
                                           _ws,
                                           wsid,
                                           _recordCursor);
                shouldKeep = _filter->matches(&tdoc);
            } catch (const WriteConflictException& wce) {
                // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may
                // be freed when we yield.
                wsm->makeObjOwnedIfNeeded();
                _idRetrying = wsid;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            } catch (const TextMatchableDocument::DocumentDeletedException&) {
                // We attempted to fetch the document but decided it should be excluded from the
                // result set.
                shouldKeep = false;
                wasDeleted = true;
            }

            if (wasDeleted || wsm->hasObj()) {
                ++_specificStats.fetches;
            }
        }

        if (shouldKeep && !wsm->hasObj()) {
            // Our parent expects RID_AND_OBJ members, so we fetch the document here if we haven't
            // already.
            try {
                shouldKeep = WorkingSetCommon::fetch(getOpCtx(), _ws, wsid, _recordCursor);
                ++_specificStats.fetches;
            } catch (const WriteConflictException& wce) {
                wsm->makeObjOwnedIfNeeded();
                _idRetrying = wsid;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            }
        }

        if (!shouldKeep) {
            _ws->free(wsid);
            textRecordData->score = -1;
            return NEED_TIME;
        }

        textRecordData->wsid = wsid;

        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
        wsm->makeObjOwnedIfNeeded();
    } else {
        // We already have a working set member for this RecordId. Free the new WSM and retrieve the
        // old one. Note that since we don't keep all index keys, we could get a score that doesn't
        // match the document, but this has always been a problem.
        // TODO something to improve the situation.
        invariant(wsid != textRecordData->wsid);
        _ws->free(wsid);
        wsm = _ws->get(textRecordData->wsid);
    }

    // Locate score within possibly compound key: {prefix,term,score,suffix}.
    BSONObjIterator keyIt(newKeyData.keyData);
    for (unsigned i = 0; i < _ftsSpec.numExtraBefore(); i++) {
        keyIt.next();
    }

    keyIt.next();  // Skip past 'term'.

    BSONElement scoreElement = keyIt.next();
    double documentTermScore = scoreElement.number();

    // Aggregate relevance score, term keys.
    textRecordData->score += documentTermScore;
    return NEED_TIME;
}
Example #9
0
PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (doneUpdating()) {
        // Even if we're done updating, we may have some inserting left to do.
        if (needInsert()) {
            // TODO we may want to handle WriteConflictException here. Currently we bounce it
            // out to a higher level since if this WCEs it is likely that we raced with another
            // upsert that may have matched our query, and therefore this may need to perform an
            // update rather than an insert. Bouncing to the higher level allows restarting the
            // query in this case.
            doInsert();

            invariant(isEOF());
            if (_params.request->shouldReturnNewDocs()) {
                // Want to return the document we just inserted, create it as a WorkingSetMember
                // so that we can return it.
                BSONObj newObj = _specificStats.objInserted;
                *out = _ws->allocate();
                WorkingSetMember* member = _ws->get(*out);
                member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(),
                                                   newObj.getOwned());
                member->transitionToOwnedObj();
                return PlanStage::ADVANCED;
            }
        }

        // At this point either we're done updating and there was no insert to do,
        // or we're done updating and we're done inserting. Either way, we're EOF.
        invariant(isEOF());
        return PlanStage::IS_EOF;
    }

    // If we're here, then we still have to ask for results from the child and apply
    // updates to them. We should only get here if the collection exists.
    invariant(_collection);

    // It is possible that after an update was applied, a WriteConflictException
    // occurred and prevented us from returning ADVANCED with the requested version
    // of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.request->shouldReturnAnyDocs());

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        // Need to get these things from the result returned by the child.
        RecordId recordId;

        WorkingSetMember* member = _ws->get(id);

        // We want to free this member when we return, unless we need to retry updating or returning
        // it.
        ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

        if (!member->hasRecordId()) {
            // We expect to be here because of an invalidation causing a force-fetch.
            ++_specificStats.nInvalidateSkips;
            return PlanStage::NEED_TIME;
        }
        recordId = member->recordId;

        // Updates can't have projections. This means that covering analysis will always add
        // a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        // We fill this with the new RecordIds of moved doc so we don't double-update.
        if (_updatedRecordIds && _updatedRecordIds->count(recordId) > 0) {
            // Found a RecordId that refers to a document we had already updated. Note that
            // we can never remove from _updatedRecordIds because updates by other clients
            // could cause us to encounter a document again later.
            return PlanStage::NEED_TIME;
        }

        bool docStillMatches;
        try {
            docStillMatches = write_stage_common::ensureStillMatches(
                _collection, getOpCtx(), _ws, id, _params.canonicalQuery);
        } catch (const WriteConflictException&) {
            // There was a problem trying to detect if the document still exists, so retry.
            memberFreer.Dismiss();
            return prepareToRetryWSM(id, out);
        }

        if (!docStillMatches) {
            // Either the document has been deleted, or it has been updated such that it no longer
            // matches the predicate.
            if (shouldRestartUpdateIfNoLongerMatches(_params)) {
                throw WriteConflictException();
            }
            return PlanStage::NEED_TIME;
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
        // is allowed to free the memory.
        member->makeObjOwnedIfNeeded();

        // Save state before making changes
        WorkingSetCommon::prepareForSnapshotChange(_ws);
        try {
            child()->saveState();
        } catch (const WriteConflictException&) {
            std::terminate();
        }

        // If we care about the pre-updated version of the doc, save it out here.
        BSONObj oldObj;
        if (_params.request->shouldReturnOldDocs()) {
            oldObj = member->obj.value().getOwned();
        }

        BSONObj newObj;
        try {
            // Do the update, get us the new version of the doc.
            newObj = transformAndUpdate(member->obj, recordId);
        } catch (const WriteConflictException&) {
            memberFreer.Dismiss();  // Keep this member around so we can retry updating it.
            return prepareToRetryWSM(id, out);
        }

        // Set member's obj to be the doc we want to return.
        if (_params.request->shouldReturnAnyDocs()) {
            if (_params.request->shouldReturnNewDocs()) {
                member->obj = Snapshotted<BSONObj>(getOpCtx()->recoveryUnit()->getSnapshotId(),
                                                   newObj.getOwned());
            } else {
                invariant(_params.request->shouldReturnOldDocs());
                member->obj.setValue(oldObj);
            }
            member->recordId = RecordId();
            member->transitionToOwnedObj();
        }

        // This should be after transformAndUpdate to make sure we actually updated this doc.
        ++_specificStats.nMatched;

        // Restore state after modification

        // As restoreState may restore (recreate) cursors, make sure to restore the
        // state outside of the WritUnitOfWork.
        try {
            child()->restoreState();
        } catch (const WriteConflictException&) {
            // Note we don't need to retry updating anything in this case since the update
            // already was committed. However, we still need to return the updated document
            // (if it was requested).
            if (_params.request->shouldReturnAnyDocs()) {
                // member->obj should refer to the document we want to return.
                invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

                _idReturning = id;
                // Keep this member around so that we can return it on the next work() call.
                memberFreer.Dismiss();
            }
            *out = WorkingSet::INVALID_ID;
            return NEED_YIELD;
        }

        if (_params.request->shouldReturnAnyDocs()) {
            // member->obj should refer to the document we want to return.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            memberFreer.Dismiss();  // Keep this member around so we can return it.
            *out = id;
            return PlanStage::ADVANCED;
        }

        return PlanStage::NEED_TIME;
    } else if (PlanStage::IS_EOF == status) {
        // The child is out of results, but we might not be done yet because we still might
        // have to do an insert.
        return PlanStage::NEED_TIME;
    } else if (PlanStage::FAILURE == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which case
        // 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "update stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
            return PlanStage::FAILURE;
        }
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}
Example #10
0
PlanStage::StageState FetchStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // If there's an obj there, there is no fetching to perform.
        if (member->hasObj()) {
            ++_specificStats.alreadyHasObj;
        } else {
            // We need a valid loc to fetch from and this is the only state that has one.
            verify(WorkingSetMember::LOC_AND_IDX == member->getState());
            verify(member->hasLoc());

            try {
                if (!_cursor)
                    _cursor = _collection->getCursor(getOpCtx());

                if (auto fetcher = _cursor->fetcherForId(member->loc)) {
                    // There's something to fetch. Hand the fetcher off to the WSM, and pass up
                    // a fetch request.
                    _idRetrying = id;
                    member->setFetcher(fetcher.release());
                    *out = id;
                    return NEED_YIELD;
                }

                // The doc is already in memory, so go ahead and grab it. Now we have a RecordId
                // as well as an unowned object
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) {
                    _ws->free(id);
                    return NEED_TIME;
                }
            } catch (const WriteConflictException& wce) {
                // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may
                // be freed when we yield.
                member->makeObjOwnedIfNeeded();
                _idRetrying = id;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            }
        }

        return returnIfMatches(member, id, out);
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it
        // failed, in which case 'id' is valid.  If ID is invalid, we
        // create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            mongoutils::str::stream ss;
            ss << "fetch stage failed to read in results from child";
            Status status(ErrorCodes::InternalError, ss);
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        }
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}
Example #11
0
PlanStage::StageState MergeSortStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (!_noResultToMerge.empty()) {
        // We have some child that we don't have a result from.  Each child must have a result
        // in order to pick the minimum result among all our children.  Work a child.
        PlanStage* child = _noResultToMerge.front();
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child->work(&id);

        if (PlanStage::ADVANCED == code) {
            WorkingSetMember* member = _ws->get(id);

            // If we're deduping...
            if (_dedup) {
                if (!member->hasRecordId()) {
                    // Can't dedup data unless there's a RecordId.  We go ahead and use its
                    // result.
                    _noResultToMerge.pop();
                } else {
                    ++_specificStats.dupsTested;
                    // ...and there's a RecordId and and we've seen the RecordId before
                    if (_seen.end() != _seen.find(member->recordId)) {
                        // ...drop it.
                        _ws->free(id);
                        ++_specificStats.dupsDropped;
                        return PlanStage::NEED_TIME;
                    } else {
                        // Otherwise, note that we've seen it.
                        _seen.insert(member->recordId);
                        // We're going to use the result from the child, so we remove it from
                        // the queue of children without a result.
                        _noResultToMerge.pop();
                    }
                }
            } else {
                // Not deduping.  We use any result we get from the child.  Remove the child
                // from the queue of things without a result.
                _noResultToMerge.pop();
            }

            // Store the result in our list.
            StageWithValue value;
            value.id = id;
            value.stage = child;
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwnedIfNeeded();
            _mergingData.push_front(value);

            // Insert the result (indirectly) into our priority queue.
            _merging.push(_mergingData.begin());

            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // There are no more results possible from this child.  Don't bother with it
            // anymore.
            _noResultToMerge.pop();
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "merge sort stage failed to read in results from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else {
            if (PlanStage::NEED_YIELD == code) {
                *out = id;
            }

            return code;
        }
    }

    // If we're here, for each non-EOF child, we have a valid WSID.
    verify(!_merging.empty());

    // Get the 'min' WSID.  _merging is a priority queue so its top is the smallest.
    MergingRef top = _merging.top();
    _merging.pop();

    // Since we're returning the WSID that came from top->stage, we need to work(...) it again
    // to get a new result.
    _noResultToMerge.push(top->stage);

    // Save the ID that we're returning and remove the returned result from our data.
    WorkingSetID idToTest = top->id;
    _mergingData.erase(top);

    // Return the min.
    *out = idToTest;

    return PlanStage::ADVANCED;
}
Example #12
0
Status CachedPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
    // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
    // execution work that happens here, so this is needed for the time accounting to
    // make sense.
    ScopedTimer timer(getClock(), &_commonStats.executionTimeMillis);

    // During plan selection, the list of indices we are using to plan must remain stable, so the
    // query will die during yield recovery if any index has been dropped. However, once plan
    // selection completes successfully, we no longer need all indices to stick around. The selected
    // plan should safely die on yield recovery if it is using the dropped index.
    //
    // Dismiss the requirement that no indices can be dropped when this method returns.
    ON_BLOCK_EXIT([this] { releaseAllIndicesRequirement(); });

    // If we work this many times during the trial period, then we will replan the
    // query from scratch.
    size_t maxWorksBeforeReplan =
        static_cast<size_t>(internalQueryCacheEvictionRatio * _decisionWorks);

    // The trial period ends without replanning if the cached plan produces this many results.
    size_t numResults = MultiPlanStage::getTrialPeriodNumToReturn(*_canonicalQuery);

    for (size_t i = 0; i < maxWorksBeforeReplan; ++i) {
        // Might need to yield between calls to work due to the timer elapsing.
        Status yieldStatus = tryYield(yieldPolicy);
        if (!yieldStatus.isOK()) {
            return yieldStatus;
        }

        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = child()->work(&id);

        if (PlanStage::ADVANCED == state) {
            // Save result for later.
            WorkingSetMember* member = _ws->get(id);
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwnedIfNeeded();
            _results.push(id);

            if (_results.size() >= numResults) {
                // Once a plan returns enough results, stop working. Update cache with stats
                // from this run and return.
                updatePlanCache();
                return Status::OK();
            }
        } else if (PlanStage::IS_EOF == state) {
            // Cached plan hit EOF quickly enough. No need to replan. Update cache with stats
            // from this run and return.
            updatePlanCache();
            return Status::OK();
        } else if (PlanStage::NEED_YIELD == state) {
            invariant(id == WorkingSet::INVALID_ID);
            if (!yieldPolicy->canAutoYield()) {
                throw WriteConflictException();
            }

            if (yieldPolicy->canAutoYield()) {
                yieldPolicy->forceYield();
            }

            Status yieldStatus = tryYield(yieldPolicy);
            if (!yieldStatus.isOK()) {
                return yieldStatus;
            }
        } else if (PlanStage::FAILURE == state) {
            // On failure, fall back to replanning the whole query. We neither evict the
            // existing cache entry nor cache the result of replanning.
            BSONObj statusObj;
            WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);

            LOG(1) << "Execution of cached plan failed, falling back to replan."
                   << " query: " << redact(_canonicalQuery->toStringShort())
                   << " planSummary: " << Explain::getPlanSummary(child().get())
                   << " status: " << redact(statusObj);

            const bool shouldCache = false;
            return replan(yieldPolicy, shouldCache);
        } else {
            invariant(PlanStage::NEED_TIME == state);
        }
    }

    // If we're here, the trial period took more than 'maxWorksBeforeReplan' work cycles. This
    // plan is taking too long, so we replan from scratch.
    LOG(1) << "Execution of cached plan required " << maxWorksBeforeReplan
           << " works, but was originally cached with only " << _decisionWorks
           << " works. Evicting cache entry and replanning query: "
           << redact(_canonicalQuery->toStringShort())
           << " plan summary before replan: " << Explain::getPlanSummary(child().get());

    const bool shouldCache = true;
    return replan(yieldPolicy, shouldCache);
}
Example #13
0
PlanStage::StageState FetchStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    StageState status;
    if (_idRetrying == WorkingSet::INVALID_ID) {
        status = child()->work(&id);
    } else {
        status = ADVANCED;
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    }

    if (PlanStage::ADVANCED == status) {
        WorkingSetMember* member = _ws->get(id);

        // If there's an obj there, there is no fetching to perform.
        if (member->hasObj()) {
            ++_specificStats.alreadyHasObj;
        } else {
            // We need a valid RecordId to fetch from and this is the only state that has one.
            verify(WorkingSetMember::RID_AND_IDX == member->getState());
            verify(member->hasRecordId());

            try {
                if (!_cursor)
                    _cursor = _collection->getCursor(getOpCtx());

                if (auto fetcher = _cursor->fetcherForId(member->recordId)) {
                    // There's something to fetch. Hand the fetcher off to the WSM, and pass up
                    // a fetch request.
                    _idRetrying = id;
                    member->setFetcher(fetcher.release());
                    *out = id;
                    return NEED_YIELD;
                }

                // The doc is already in memory, so go ahead and grab it. Now we have a RecordId
                // as well as an unowned object
                if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) {
                    _ws->free(id);
                    return NEED_TIME;
                }
            } catch (const WriteConflictException&) {
                // Ensure that the BSONObj underlying the WorkingSetMember is owned because it may
                // be freed when we yield.
                member->makeObjOwnedIfNeeded();
                _idRetrying = id;
                *out = WorkingSet::INVALID_ID;
                return NEED_YIELD;
            }
        }

        return returnIfMatches(member, id, out);
    } else if (PlanStage::FAILURE == status || PlanStage::DEAD == status) {
        // The stage which produces a failure is responsible for allocating a working set member
        // with error details.
        invariant(WorkingSet::INVALID_ID != id);
        *out = id;
        return status;
    } else if (PlanStage::NEED_YIELD == status) {
        *out = id;
    }

    return status;
}