Exemplo n.º 1
0
// static
bool WorkingSetCommon::fetch(OperationContext* txn,
                             WorkingSet* workingSet,
                             WorkingSetID id,
                             unowned_ptr<SeekableRecordCursor> cursor) {
    WorkingSetMember* member = workingSet->get(id);

    // The RecordFetcher should already have been transferred out of the WSM and used.
    invariant(!member->hasFetcher());

    // We should have a RecordId but need to retrieve the obj. Get the obj now and reset all WSM
    // state appropriately.
    invariant(member->hasRecordId());

    member->obj.reset();
    auto record = cursor->seekExact(member->recordId);
    if (!record) {
        return false;
    }

    member->obj = {txn->recoveryUnit()->getSnapshotId(), record->data.releaseToBson()};

    if (member->isSuspicious) {
        // Make sure that all of the keyData is still valid for this copy of the document.
        // This ensures both that index-provided filters and sort orders still hold.
        // TODO provide a way for the query planner to opt out of this checking if it is
        // unneeded due to the structure of the plan.
        invariant(!member->keyData.empty());
        for (size_t i = 0; i < member->keyData.size(); i++) {
            BSONObjSet keys;
            // There's no need to compute the prefixes of the indexed fields that cause the index to
            // be multikey when ensuring the keyData is still valid.
            MultikeyPaths* multikeyPaths = nullptr;
            member->keyData[i].index->getKeys(member->obj.value(), &keys, multikeyPaths);
            if (!keys.count(member->keyData[i].keyData)) {
                // document would no longer be at this position in the index.
                return false;
            }
        }

        member->isSuspicious = false;
    }

    member->keyData.clear();
    workingSet->transitionToRecordIdAndObj(id);
    return true;
}
Exemplo n.º 2
0
Status CachedPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
    // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
    // execution work that happens here, so this is needed for the time accounting to
    // make sense.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    // If we work this many times during the trial period, then we will replan the
    // query from scratch.
    size_t maxWorksBeforeReplan =
        static_cast<size_t>(internalQueryCacheEvictionRatio * _decisionWorks);

    // The trial period ends without replanning if the cached plan produces this many results.
    size_t numResults = MultiPlanStage::getTrialPeriodNumToReturn(*_canonicalQuery);

    for (size_t i = 0; i < maxWorksBeforeReplan; ++i) {
        // Might need to yield between calls to work due to the timer elapsing.
        Status yieldStatus = tryYield(yieldPolicy);
        if (!yieldStatus.isOK()) {
            return yieldStatus;
        }

        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = child()->work(&id);

        if (PlanStage::ADVANCED == state) {
            // Save result for later.
            WorkingSetMember* member = _ws->get(id);
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwnedIfNeeded();
            _results.push_back(id);

            if (_results.size() >= numResults) {
                // Once a plan returns enough results, stop working. Update cache with stats
                // from this run and return.
                updatePlanCache();
                return Status::OK();
            }
        } else if (PlanStage::IS_EOF == state) {
            // Cached plan hit EOF quickly enough. No need to replan. Update cache with stats
            // from this run and return.
            updatePlanCache();
            return Status::OK();
        } else if (PlanStage::NEED_YIELD == state) {
            if (id == WorkingSet::INVALID_ID) {
                if (!yieldPolicy->allowedToYield()) {
                    throw WriteConflictException();
                }
            } else {
                WorkingSetMember* member = _ws->get(id);
                invariant(member->hasFetcher());
                // Transfer ownership of the fetcher and yield.
                _fetcher.reset(member->releaseFetcher());
            }

            if (yieldPolicy->allowedToYield()) {
                yieldPolicy->forceYield();
            }

            Status yieldStatus = tryYield(yieldPolicy);
            if (!yieldStatus.isOK()) {
                return yieldStatus;
            }
        } else if (PlanStage::FAILURE == state) {
            // On failure, fall back to replanning the whole query. We neither evict the
            // existing cache entry nor cache the result of replanning.
            BSONObj statusObj;
            WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);

            LOG(1) << "Execution of cached plan failed, falling back to replan."
                   << " query: " << _canonicalQuery->toStringShort()
                   << " planSummary: " << Explain::getPlanSummary(child().get())
                   << " status: " << statusObj;

            const bool shouldCache = false;
            return replan(yieldPolicy, shouldCache);
        } else if (PlanStage::DEAD == state) {
            BSONObj statusObj;
            WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);

            LOG(1) << "Execution of cached plan failed: PlanStage died"
                   << ", query: " << _canonicalQuery->toStringShort()
                   << " planSummary: " << Explain::getPlanSummary(child().get())
                   << " status: " << statusObj;

            return WorkingSetCommon::getMemberObjectStatus(statusObj);
        } else {
            invariant(PlanStage::NEED_TIME == state);
        }
    }

    // If we're here, the trial period took more than 'maxWorksBeforeReplan' work cycles. This
    // plan is taking too long, so we replan from scratch.
    LOG(1) << "Execution of cached plan required " << maxWorksBeforeReplan
           << " works, but was originally cached with only " << _decisionWorks
           << " works. Evicting cache entry and replanning query: "
           << _canonicalQuery->toStringShort()
           << " plan summary before replan: " << Explain::getPlanSummary(child().get());

    const bool shouldCache = true;
    return replan(yieldPolicy, shouldCache);
}
Exemplo n.º 3
0
    bool MultiPlanStage::workAllPlans(size_t numResults, PlanYieldPolicy* yieldPolicy) {
        bool doneWorking = false;

        for (size_t ix = 0; ix < _candidates.size(); ++ix) {
            CandidatePlan& candidate = _candidates[ix];
            if (candidate.failed) { continue; }

            // Might need to yield between calls to work due to the timer elapsing.
            if (!(tryYield(yieldPolicy)).isOK()) {
                return false;
            }

            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState state = candidate.root->work(&id);

            if (PlanStage::ADVANCED == state) {
                // Save result for later.
                candidate.results.push_back(id);

                // Once a plan returns enough results, stop working.
                if (candidate.results.size() >= numResults) {
                    doneWorking = true;
                }
            }
            else if (PlanStage::IS_EOF == state) {
                // First plan to hit EOF wins automatically.  Stop evaluating other plans.
                // Assumes that the ranking will pick this plan.
                doneWorking = true;
            }
            else if (PlanStage::NEED_YIELD == state) {
                if (id == WorkingSet::INVALID_ID) {
                    if (!yieldPolicy->allowedToYield())
                        throw WriteConflictException();
                }
                else {
                    WorkingSetMember* member = candidate.ws->get(id);
                    invariant(member->hasFetcher());
                    // Transfer ownership of the fetcher and yield.
                    _fetcher.reset(member->releaseFetcher());
                }

                if (yieldPolicy->allowedToYield()) {
                    yieldPolicy->forceYield();
                }

                if (!(tryYield(yieldPolicy)).isOK()) {
                    return false;
                }
            }
            else if (PlanStage::NEED_TIME != state) {
                // FAILURE or DEAD.  Do we want to just tank that plan and try the rest?  We
                // probably want to fail globally as this shouldn't happen anyway.

                candidate.failed = true;
                ++_failureCount;

                // Propagate most recent seen failure to parent.
                if (PlanStage::FAILURE == state) {
                    _statusMemberId = id;
                }

                if (_failureCount == _candidates.size()) {
                    _failure = true;
                    return false;
                }
            }
        }

        return !doneWorking;
    }
Exemplo n.º 4
0
PlanExecutor::ExecState PlanExecutor::getNextImpl(Snapshotted<BSONObj>* objOut, RecordId* dlOut) {
    if (MONGO_FAIL_POINT(planExecutorAlwaysFails)) {
        Status status(ErrorCodes::OperationFailed,
                      str::stream() << "PlanExecutor hit planExecutorAlwaysFails fail point");
        *objOut =
            Snapshotted<BSONObj>(SnapshotId(), WorkingSetCommon::buildMemberStatusObject(status));

        return PlanExecutor::FAILURE;
    }

    invariant(_currentState == kUsable);
    if (isMarkedAsKilled()) {
        if (NULL != objOut) {
            Status status(ErrorCodes::OperationFailed,
                          str::stream() << "Operation aborted because: " << *_killReason);
            *objOut = Snapshotted<BSONObj>(SnapshotId(),
                                           WorkingSetCommon::buildMemberStatusObject(status));
        }
        return PlanExecutor::DEAD;
    }

    if (!_stash.empty()) {
        invariant(objOut && !dlOut);
        *objOut = {SnapshotId(), _stash.front()};
        _stash.pop();
        return PlanExecutor::ADVANCED;
    }

    // When a stage requests a yield for document fetch, it gives us back a RecordFetcher*
    // to use to pull the record into memory. We take ownership of the RecordFetcher here,
    // deleting it after we've had a chance to do the fetch. For timing-based yields, we
    // just pass a NULL fetcher.
    unique_ptr<RecordFetcher> fetcher;

    // Incremented on every writeConflict, reset to 0 on any successful call to _root->work.
    size_t writeConflictsInARow = 0;

    for (;;) {
        // These are the conditions which can cause us to yield:
        //   1) The yield policy's timer elapsed, or
        //   2) some stage requested a yield due to a document fetch, or
        //   3) we need to yield and retry due to a WriteConflictException.
        // In all cases, the actual yielding happens here.
        if (_yieldPolicy->shouldYield()) {
            if (!_yieldPolicy->yield(fetcher.get())) {
                // A return of false from a yield should only happen if we've been killed during the
                // yield.
                invariant(isMarkedAsKilled());

                if (NULL != objOut) {
                    Status status(ErrorCodes::OperationFailed,
                                  str::stream() << "Operation aborted because: " << *_killReason);
                    *objOut = Snapshotted<BSONObj>(
                        SnapshotId(), WorkingSetCommon::buildMemberStatusObject(status));
                }
                return PlanExecutor::DEAD;
            }
        }

        // We're done using the fetcher, so it should be freed. We don't want to
        // use the same RecordFetcher twice.
        fetcher.reset();

        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState code = _root->work(&id);

        if (code != PlanStage::NEED_YIELD)
            writeConflictsInARow = 0;

        if (PlanStage::ADVANCED == code) {
            WorkingSetMember* member = _workingSet->get(id);
            bool hasRequestedData = true;

            if (NULL != objOut) {
                if (WorkingSetMember::RID_AND_IDX == member->getState()) {
                    if (1 != member->keyData.size()) {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    } else {
                        // TODO: currently snapshot ids are only associated with documents, and
                        // not with index keys.
                        *objOut = Snapshotted<BSONObj>(SnapshotId(), member->keyData[0].keyData);
                    }
                } else if (member->hasObj()) {
                    *objOut = member->obj;
                } else {
                    _workingSet->free(id);
                    hasRequestedData = false;
                }
            }

            if (NULL != dlOut) {
                if (member->hasRecordId()) {
                    *dlOut = member->recordId;
                } else {
                    _workingSet->free(id);
                    hasRequestedData = false;
                }
            }

            if (hasRequestedData) {
                _workingSet->free(id);
                return PlanExecutor::ADVANCED;
            }
            // This result didn't have the data the caller wanted, try again.
        } else if (PlanStage::NEED_YIELD == code) {
            if (id == WorkingSet::INVALID_ID) {
                if (!_yieldPolicy->canAutoYield())
                    throw WriteConflictException();
                CurOp::get(_opCtx)->debug().writeConflicts++;
                writeConflictsInARow++;
                WriteConflictException::logAndBackoff(
                    writeConflictsInARow, "plan execution", _nss.ns());

            } else {
                WorkingSetMember* member = _workingSet->get(id);
                invariant(member->hasFetcher());
                // Transfer ownership of the fetcher. Next time around the loop a yield will
                // happen.
                fetcher.reset(member->releaseFetcher());
            }

            // If we're allowed to, we will yield next time through the loop.
            if (_yieldPolicy->canAutoYield())
                _yieldPolicy->forceYield();
        } else if (PlanStage::NEED_TIME == code) {
            // Fall through to yield check at end of large conditional.
        } else if (PlanStage::IS_EOF == code) {
            if (shouldWaitForInserts()) {
                const bool locksReacquiredAfterYield = waitForInserts();
                if (locksReacquiredAfterYield) {
                    // There may be more results, try to get more data.
                    continue;
                }
                invariant(isMarkedAsKilled());
                if (objOut) {
                    Status status(ErrorCodes::OperationFailed,
                                  str::stream() << "Operation aborted because: " << *_killReason);
                    *objOut = Snapshotted<BSONObj>(
                        SnapshotId(), WorkingSetCommon::buildMemberStatusObject(status));
                }
                return PlanExecutor::DEAD;
            } else {
                return PlanExecutor::IS_EOF;
            }
        } else {
            invariant(PlanStage::DEAD == code || PlanStage::FAILURE == code);

            if (NULL != objOut) {
                BSONObj statusObj;
                WorkingSetCommon::getStatusMemberObject(*_workingSet, id, &statusObj);
                *objOut = Snapshotted<BSONObj>(SnapshotId(), statusObj);
            }

            return (PlanStage::DEAD == code) ? PlanExecutor::DEAD : PlanExecutor::FAILURE;
        }
    }
}
Exemplo n.º 5
0
    PlanExecutor::ExecState PlanExecutor::getNext(BSONObj* objOut, DiskLoc* dlOut) {
        if (_killed) { return PlanExecutor::DEAD; }

        // When a stage requests a yield for document fetch, it gives us back a RecordFetcher*
        // to use to pull the record into memory. We take ownership of the RecordFetcher here,
        // deleting it after we've had a chance to do the fetch. For timing-based yields, we
        // just pass a NULL fetcher.
        boost::scoped_ptr<RecordFetcher> fetcher;

        for (;;) {
            // There are two conditions which cause us to yield if we have an YIELD_AUTO
            // policy:
            //   1) The yield policy's timer elapsed, or
            //   2) some stage requested a yield due to a document fetch (NEED_FETCH).
            // In both cases, the actual yielding happens here.
            if (NULL != _yieldPolicy.get() && (_yieldPolicy->shouldYield()
                                               || NULL != fetcher.get())) {
                // Here's where we yield.
                _yieldPolicy->yield(fetcher.get());

                if (_killed) {
                    return PlanExecutor::DEAD;
                }
            }

            // We're done using the fetcher, so it should be freed. We don't want to
            // use the same RecordFetcher twice.
            fetcher.reset();

            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState code = _root->work(&id);

            if (PlanStage::ADVANCED == code) {
                // Fast count.
                if (WorkingSet::INVALID_ID == id) {
                    invariant(NULL == objOut);
                    invariant(NULL == dlOut);
                    return PlanExecutor::ADVANCED;
                }

                WorkingSetMember* member = _workingSet->get(id);
                bool hasRequestedData = true;

                if (NULL != objOut) {
                    if (WorkingSetMember::LOC_AND_IDX == member->state) {
                        if (1 != member->keyData.size()) {
                            _workingSet->free(id);
                            hasRequestedData = false;
                        }
                        else {
                            *objOut = member->keyData[0].keyData;
                        }
                    }
                    else if (member->hasObj()) {
                        *objOut = member->obj;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (NULL != dlOut) {
                    if (member->hasLoc()) {
                        *dlOut = member->loc;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (hasRequestedData) {
                    _workingSet->free(id);
                    return PlanExecutor::ADVANCED;
                }
                // This result didn't have the data the caller wanted, try again.
            }
            else if (PlanStage::NEED_FETCH == code) {
                // Yielding on a NEED_FETCH is handled above, so there's not much to do here.
                // Just verify that the NEED_FETCH gave us back a WSM that is actually fetchable.
                WorkingSetMember* member = _workingSet->get(id);
                invariant(member->hasFetcher());
                // Transfer ownership of the fetcher. Next time around the loop a yield will happen.
                fetcher.reset(member->releaseFetcher());
            }
            else if (PlanStage::NEED_TIME == code) {
                // Fall through to yield check at end of large conditional.
            }
            else if (PlanStage::IS_EOF == code) {
                return PlanExecutor::IS_EOF;
            }
            else if (PlanStage::DEAD == code) {
                return PlanExecutor::DEAD;
            }
            else {
                verify(PlanStage::FAILURE == code);
                if (NULL != objOut) {
                    WorkingSetCommon::getStatusMemberObject(*_workingSet, id, objOut);
                }
                return PlanExecutor::EXEC_ERROR;
            }
        }
    }
Exemplo n.º 6
0
    PlanExecutor::ExecState PlanExecutor::getNextSnapshotted(Snapshotted<BSONObj>* objOut,
                                                             RecordId* dlOut) {
        if (_killed) { return PlanExecutor::DEAD; }

        // When a stage requests a yield for document fetch, it gives us back a RecordFetcher*
        // to use to pull the record into memory. We take ownership of the RecordFetcher here,
        // deleting it after we've had a chance to do the fetch. For timing-based yields, we
        // just pass a NULL fetcher.
        boost::scoped_ptr<RecordFetcher> fetcher;

        // Incremented on every writeConflict, reset to 0 on any successful call to _root->work.
        size_t writeConflictsInARow = 0;

        for (;;) {
            // These are the conditions which can cause us to yield:
            //   1) The yield policy's timer elapsed, or
            //   2) some stage requested a yield due to a document fetch, or
            //   3) we need to yield and retry due to a WriteConflictException.
            // In all cases, the actual yielding happens here.
            if (_yieldPolicy->shouldYield()) {
                _yieldPolicy->yield(fetcher.get());

                if (_killed) {
                    return PlanExecutor::DEAD;
                }
            }

            // We're done using the fetcher, so it should be freed. We don't want to
            // use the same RecordFetcher twice.
            fetcher.reset();

            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState code = _root->work(&id);

            if (code != PlanStage::NEED_YIELD)
                writeConflictsInARow = 0;

            if (PlanStage::ADVANCED == code) {
                // Fast count.
                if (WorkingSet::INVALID_ID == id) {
                    invariant(NULL == objOut);
                    invariant(NULL == dlOut);
                    return PlanExecutor::ADVANCED;
                }

                WorkingSetMember* member = _workingSet->get(id);
                bool hasRequestedData = true;

                if (NULL != objOut) {
                    if (WorkingSetMember::LOC_AND_IDX == member->state) {
                        if (1 != member->keyData.size()) {
                            _workingSet->free(id);
                            hasRequestedData = false;
                        }
                        else {
                            // TODO: currently snapshot ids are only associated with documents, and
                            // not with index keys.
                            *objOut = Snapshotted<BSONObj>(SnapshotId(),
                                                           member->keyData[0].keyData);
                        }
                    }
                    else if (member->hasObj()) {
                        *objOut = member->obj;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (NULL != dlOut) {
                    if (member->hasLoc()) {
                        *dlOut = member->loc;
                    }
                    else {
                        _workingSet->free(id);
                        hasRequestedData = false;
                    }
                }

                if (hasRequestedData) {
                    _workingSet->free(id);
                    return PlanExecutor::ADVANCED;
                }
                // This result didn't have the data the caller wanted, try again.
            }
            else if (PlanStage::NEED_YIELD == code) {
                if (id == WorkingSet::INVALID_ID) {
                    if (!_yieldPolicy->allowedToYield()) throw WriteConflictException();
                    _opCtx->getCurOp()->debug().writeConflicts++;
                    writeConflictsInARow++;
                    WriteConflictException::logAndBackoff(writeConflictsInARow,
                                                          "plan execution",
                                                          _collection->ns().ns());

                }
                else {
                    WorkingSetMember* member = _workingSet->get(id);
                    invariant(member->hasFetcher());
                    // Transfer ownership of the fetcher. Next time around the loop a yield will
                    // happen.
                    fetcher.reset(member->releaseFetcher());
                }

                // If we're allowed to, we will yield next time through the loop.
                if (_yieldPolicy->allowedToYield()) _yieldPolicy->forceYield();
            }
            else if (PlanStage::NEED_TIME == code) {
                // Fall through to yield check at end of large conditional.
            }
            else if (PlanStage::IS_EOF == code) {
                return PlanExecutor::IS_EOF;
            }
            else if (PlanStage::DEAD == code) {
                return PlanExecutor::DEAD;
            }
            else {
                verify(PlanStage::FAILURE == code);
                if (NULL != objOut) {
                    BSONObj statusObj;
                    WorkingSetCommon::getStatusMemberObject(*_workingSet, id, &statusObj);
                    *objOut = Snapshotted<BSONObj>(SnapshotId(), statusObj);
                }
                return PlanExecutor::FAILURE;
            }
        }
    }