Ejemplo n.º 1
0
    bool MultiPlanRunner::workAllPlans() {
        for (size_t i = 0; i < _candidates.size(); ++i) {
            CandidatePlan& candidate = _candidates[i];

            WorkingSetID id;
            PlanStage::StageState state = candidate.root->work(&id);
            if (PlanStage::ADVANCED == state) {
                // Save result for later.
                candidate.results.push(id);
            }
            else if (PlanStage::NEED_TIME == state) {
                // Nothing to do here.
            }
            else if (PlanStage::NEED_FETCH == state) {
                // XXX: We can yield to do this.  We have to deal with synchronization issues with
                // regards to the working set and invalidation.  What if another thread invalidates
                // the thing we're fetching?  The loc could vanish between hasLoc() and the actual
                // fetch...

                // id has a loc and refers to an obj we need to fetch.
                WorkingSetMember* member = candidate.ws->get(id);

                // This must be true for somebody to request a fetch and can only change when an
                // invalidation happens, which is when we give up a lock.  Don't give up the
                // lock between receiving the NEED_FETCH and actually fetching(?).
                verify(member->hasLoc());

                // Actually bring record into memory.
                Record* record = member->loc.rec();
                record->touch();

                // Record should be in memory now.  Log if it's not.
                if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) {
                    OCCASIONALLY {
                        warning() << "Record wasn't in memory immediately after fetch: "
                            << member->loc.toString() << endl;
                    }
                }

                // Note that we're not freeing id.  Fetch semantics say that we shouldn't.
            }
Ejemplo n.º 2
0
    Status SortStageKeyGenerator::getSortKey(const WorkingSetMember& member,
                                             BSONObj* objOut) const {
        BSONObj btreeKeyToUse;

        Status btreeStatus = getBtreeKey(member.obj, &btreeKeyToUse);
        if (!btreeStatus.isOK()) {
            return btreeStatus;
        }

        if (!_sortHasMeta) {
            *objOut = btreeKeyToUse;
            return Status::OK();
        }

        BSONObjBuilder mergedKeyBob;

        // Merge metadata into the key.
        BSONObjIterator it(_rawSortSpec);
        BSONObjIterator btreeIt(btreeKeyToUse);
        while (it.more()) {
            BSONElement elt = it.next();
            if (elt.isNumber()) {
                // Merge btree key elt.
                mergedKeyBob.append(btreeIt.next());
            }
            else if (LiteParsedQuery::isTextScoreMeta(elt)) {
                // Add text score metadata
                double score = 0.0;
                if (member.hasComputed(WSM_COMPUTED_TEXT_SCORE)) {
                    const TextScoreComputedData* scoreData
                        = static_cast<const TextScoreComputedData*>(
                                member.getComputed(WSM_COMPUTED_TEXT_SCORE));
                    score = scoreData->getScore();
                }
                mergedKeyBob.append("$metaTextScore", score);
            }
        }

        *objOut = mergedKeyBob.obj();
        return Status::OK();
    }
Ejemplo n.º 3
0
void WorkingSetCommon::prepareForSnapshotChange(WorkingSet* workingSet) {
    if (!supportsDocLocking()) {
        // Non doc-locking storage engines use invalidations, so we don't need to examine the
        // buffered working set ids. But we do need to clear the set of ids in order to keep our
        // memory utilization in check.
        workingSet->getAndClearYieldSensitiveIds();
        return;
    }

    for (auto id : workingSet->getAndClearYieldSensitiveIds()) {
        if (workingSet->isFree(id)) {
            continue;
        }

        // We may see the same member twice, so anything we do here should be idempotent.
        WorkingSetMember* member = workingSet->get(id);
        if (member->getState() == WorkingSetMember::RID_AND_IDX) {
            member->isSuspicious = true;
        }
    }
}
Ejemplo n.º 4
0
    void getRecordIds(Collection* collection,
                      CollectionScanParams::Direction direction,
                      vector<RecordId>* out) {
        WorkingSet ws;

        CollectionScanParams params;
        params.collection = collection;
        params.direction = direction;
        params.tailable = false;

        unique_ptr<CollectionScan> scan(new CollectionScan(&_txn, params, &ws, NULL));
        while (!scan->isEOF()) {
            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState state = scan->work(&id);
            if (PlanStage::ADVANCED == state) {
                WorkingSetMember* member = ws.get(id);
                verify(member->hasRecordId());
                out->push_back(member->recordId);
            }
        }
    }
Ejemplo n.º 5
0
    void MergeSortStage::invalidate(const DiskLoc& dl, InvalidationType type) {
        ++_commonStats.invalidates;
        for (size_t i = 0; i < _children.size(); ++i) {
            _children[i]->invalidate(dl, type);
        }

        // Go through our data and see if we're holding on to the invalidated loc.
        for (list<StageWithValue>::iterator valueIt = _mergingData.begin(); valueIt != _mergingData.end(); valueIt++) {
            WorkingSetMember* member = _ws->get(valueIt->id);
            if (member->hasLoc() && (dl == member->loc)) {
                // Force a fetch and flag.  We could possibly merge this result back in later.
                WorkingSetCommon::fetchAndInvalidateLoc(member, _collection);
                _ws->flagForReview(valueIt->id);
                ++_specificStats.forcedFetches;
            }
        }

        // If we see DL again it is not the same record as it once was so we still want to
        // return it.
        if (_dedup) { _seen.erase(dl); }
    }
Ejemplo n.º 6
0
void MergeSortStage::doInvalidate(OperationContext* txn,
                                  const RecordId& dl,
                                  InvalidationType type) {
    // Go through our data and see if we're holding on to the invalidated RecordId.
    for (list<StageWithValue>::iterator valueIt = _mergingData.begin();
         valueIt != _mergingData.end();
         valueIt++) {
        WorkingSetMember* member = _ws->get(valueIt->id);
        if (member->hasRecordId() && (dl == member->recordId)) {
            // Fetch the about-to-be mutated result.
            WorkingSetCommon::fetchAndInvalidateRecordId(txn, member, _collection);
            ++_specificStats.forcedFetches;
        }
    }

    // If we see the deleted RecordId again it is not the same record as it once was so we still
    // want to return it.
    if (_dedup && INVALIDATION_DELETION == type) {
        _seen.erase(dl);
    }
}
Ejemplo n.º 7
0
    void MultiPlanRunner::invalidate(const DiskLoc& dl, InvalidationType type) {
        if (_failure || _killed) { return; }

        if (NULL != _bestPlan) {
            _bestPlan->invalidate(dl, type);
            for (list<WorkingSetID>::iterator it = _alreadyProduced.begin();
                 it != _alreadyProduced.end();) {
                WorkingSetMember* member = _bestPlan->getWorkingSet()->get(*it);
                if (member->hasLoc() && member->loc == dl) {
                    list<WorkingSetID>::iterator next = it;
                    next++;
                    WorkingSetCommon::fetchAndInvalidateLoc(member);
                    _bestPlan->getWorkingSet()->flagForReview(*it);
                    _alreadyProduced.erase(it);
                    it = next;
                }
                else {
                    it++;
                }
            }
            if (NULL != _backupPlan) {
                _backupPlan->invalidate(dl, type);
                for (list<WorkingSetID>::iterator it = _backupAlreadyProduced.begin();
                        it != _backupAlreadyProduced.end();) {
                    WorkingSetMember* member = _backupPlan->getWorkingSet()->get(*it);
                    if (member->hasLoc() && member->loc == dl) {
                        list<WorkingSetID>::iterator next = it;
                        next++;
                        WorkingSetCommon::fetchAndInvalidateLoc(member);
                        _backupPlan->getWorkingSet()->flagForReview(*it);
                        _backupAlreadyProduced.erase(it);
                        it = next;
                    }
                    else {
                        it++;
                    }
                }
            }
        }
        else {
            for (size_t i = 0; i < _candidates.size(); ++i) {
                _candidates[i].root->invalidate(dl, type);
                for (list<WorkingSetID>::iterator it = _candidates[i].results.begin();
                     it != _candidates[i].results.end();) {
                    WorkingSetMember* member = _candidates[i].ws->get(*it);
                    if (member->hasLoc() && member->loc == dl) {
                        list<WorkingSetID>::iterator next = it;
                        next++;
                        WorkingSetCommon::fetchAndInvalidateLoc(member);
                        _candidates[i].ws->flagForReview(*it);
                        _candidates[i].results.erase(it);
                        it = next;
                    }
                    else {
                        it++;
                    }
                }
            }
        }
    }
Ejemplo n.º 8
0
PlanStage::StageState MultiIteratorStage::work(WorkingSetID* out) {
    if (_collection == NULL) {
        Status status(ErrorCodes::InternalError, "MultiIteratorStage died on null collection");
        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        return PlanStage::DEAD;
    }

    boost::optional<Record> record;
    try {
        while (!_iterators.empty()) {
            if (auto fetcher = _iterators.back()->fetcherForNext()) {
                // Pass the RecordFetcher off up.
                WorkingSetMember* member = _ws->get(_wsidForFetch);
                member->setFetcher(fetcher.release());
                *out = _wsidForFetch;
                return NEED_YIELD;
            }

            record = _iterators.back()->next();
            if (record)
                break;
            _iterators.pop_back();
        }
    } catch (const WriteConflictException& wce) {
        // If _advance throws a WCE we shouldn't have moved.
        invariant(!_iterators.empty());
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (!record)
        return IS_EOF;

    *out = _ws->allocate();
    WorkingSetMember* member = _ws->get(*out);
    member->loc = record->id;
    member->obj = {_txn->recoveryUnit()->getSnapshotId(), record->data.releaseToBson()};
    member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
    return PlanStage::ADVANCED;
}
Ejemplo n.º 9
0
    void run() {
        // Various variables we'll need.
        OldClientWriteContext ctx(&_txn, nss.ns());
        Collection* coll = ctx.getCollection();
        const BSONObj query = BSONObj();
        const auto ws = make_unique<WorkingSet>();
        const unique_ptr<CanonicalQuery> cq(canonicalize(query));

        // Configure a QueuedDataStage to pass an OWNED_OBJ to the delete stage.
        auto qds = make_unique<QueuedDataStage>(&_txn, ws.get());
        {
            WorkingSetID id = ws->allocate();
            WorkingSetMember* member = ws->get(id);
            member->obj = Snapshotted<BSONObj>(SnapshotId(), fromjson("{x: 1}"));
            member->transitionToOwnedObj();
            qds->pushBack(id);
        }

        // Configure the delete.
        DeleteStageParams deleteParams;
        deleteParams.isMulti = false;
        deleteParams.canonicalQuery = cq.get();

        const auto deleteStage =
            make_unique<DeleteStage>(&_txn, deleteParams, ws.get(), coll, qds.release());
        const DeleteStats* stats = static_cast<const DeleteStats*>(deleteStage->getSpecificStats());

        // Call work, passing the set up member to the delete stage.
        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = deleteStage->work(&id);

        // Should return NEED_TIME, not deleting anything.
        ASSERT_EQUALS(PlanStage::NEED_TIME, state);
        ASSERT_EQUALS(stats->docsDeleted, 0U);

        id = WorkingSet::INVALID_ID;
        state = deleteStage->work(&id);
        ASSERT_EQUALS(PlanStage::IS_EOF, state);
    }
Ejemplo n.º 10
0
PlanStage::StageState SortKeyGeneratorStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    // Adds the amount of time taken by work() to executionTimeMillis.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (!_sortKeyGen) {
        _sortKeyGen = stdx::make_unique<SortKeyGenerator>(_collection, _sortSpec, _query);
        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    }

    auto stageState = child()->work(out);
    if (stageState == PlanStage::ADVANCED) {
        WorkingSetMember* member = _ws->get(*out);

        BSONObj sortKey;
        Status sortKeyStatus = _sortKeyGen->getSortKey(*member, &sortKey);
        if (!sortKeyStatus.isOK()) {
            *out = WorkingSetCommon::allocateStatusMember(_ws, sortKeyStatus);
            return PlanStage::FAILURE;
        }

        // Add the sort key to the WSM as computed data.
        member->addComputed(new SortKeyComputedData(sortKey));

        return PlanStage::ADVANCED;
    }

    if (stageState == PlanStage::IS_EOF) {
        _commonStats.isEOF = true;
    } else if (stageState == PlanStage::NEED_TIME) {
        ++_commonStats.needTime;
    } else if (stageState == PlanStage::NEED_YIELD) {
        ++_commonStats.needYield;
    }

    return stageState;
}
Ejemplo n.º 11
0
Archivo: near.cpp Proyecto: 3rf/mongo
    void NearStage::invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) {
        ++_stats->common.invalidates;
        for (size_t i = 0; i < _childrenIntervals.size(); i++) {
            _childrenIntervals[i]->covering->invalidate(txn, dl, type);
        }

        // If a result is in _resultBuffer and has a RecordId it will be in _nextIntervalSeen as
        // well. It's safe to return the result w/o the RecordId, so just fetch the result.
        unordered_map<RecordId, WorkingSetID, RecordId::Hasher>::iterator seenIt = _nextIntervalSeen
            .find(dl);

        if (seenIt != _nextIntervalSeen.end()) {

            WorkingSetMember* member = _workingSet->get(seenIt->second);
            verify(member->hasLoc());
            WorkingSetCommon::fetchAndInvalidateLoc(txn, member, _collection);
            verify(!member->hasLoc());

            // Don't keep it around in the seen map since there's no valid RecordId anymore
            _nextIntervalSeen.erase(seenIt);
        }
    }
Ejemplo n.º 12
0
    void NearStage::invalidate(const DiskLoc& dl, InvalidationType type) {
        ++_stats->common.invalidates;
        if (_nextInterval) {
            _nextInterval->covering->invalidate(dl, type);
        }

        // If a result is in _resultBuffer and has a DiskLoc it will be in _nextIntervalSeen as
        // well. It's safe to return the result w/o the DiskLoc, so just fetch the result.
        unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher>::iterator seenIt = _nextIntervalSeen
            .find(dl);

        if (seenIt != _nextIntervalSeen.end()) {

            WorkingSetMember* member = _workingSet->get(seenIt->second);
            verify(member->hasLoc());
            WorkingSetCommon::fetchAndInvalidateLoc(member, _collection);
            verify(!member->hasLoc());

            // Don't keep it around in the seen map since there's no valid DiskLoc anymore
            _nextIntervalSeen.erase(seenIt);
        }
    }
Ejemplo n.º 13
0
PlanStage::StageState TextOrStage::returnResults(WorkingSetID* out) {
    if (_scoreIterator == _scores.end()) {
        _internalState = State::kDone;
        return PlanStage::IS_EOF;
    }

    // Retrieve the record that contains the text score.
    TextRecordData textRecordData = _scoreIterator->second;
    ++_scoreIterator;

    // Ignore non-matched documents.
    if (textRecordData.score < 0) {
        invariant(textRecordData.wsid == WorkingSet::INVALID_ID);
        return PlanStage::NEED_TIME;
    }

    WorkingSetMember* wsm = _ws->get(textRecordData.wsid);

    // Populate the working set member with the text score and return it.
    wsm->addComputed(new TextScoreComputedData(textRecordData.score));
    *out = textRecordData.wsid;
    return PlanStage::ADVANCED;
}
Ejemplo n.º 14
0
PlanStage::StageState PipelineProxyStage::doWork(WorkingSetID* out) {
    if (!out) {
        return PlanStage::FAILURE;
    }

    if (!_stash.empty()) {
        *out = _ws->allocate();
        WorkingSetMember* member = _ws->get(*out);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), _stash.back());
        _stash.pop_back();
        member->transitionToOwnedObj();
        return PlanStage::ADVANCED;
    }

    if (boost::optional<BSONObj> next = getNextBson()) {
        *out = _ws->allocate();
        WorkingSetMember* member = _ws->get(*out);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), *next);
        member->transitionToOwnedObj();
        return PlanStage::ADVANCED;
    }

    return PlanStage::IS_EOF;
}
Ejemplo n.º 15
0
    bool run(OperationContext* txn,
             const string& dbname,
             BSONObj& cmdObj,
             int,
             string& errmsg,
             BSONObjBuilder& result) {
        BSONElement first = cmdObj.firstElement();
        uassert(28528,
                str::stream() << "Argument to listIndexes must be of type String, not "
                              << typeName(first.type()),
                first.type() == String);
        StringData collectionName = first.valueStringData();
        uassert(28529,
                str::stream() << "Argument to listIndexes must be a collection name, "
                              << "not the empty string",
                !collectionName.empty());
        const NamespaceString ns(dbname, collectionName);

        const long long defaultBatchSize = std::numeric_limits<long long>::max();
        long long batchSize;
        Status parseCursorStatus = parseCommandCursorOptions(cmdObj, defaultBatchSize, &batchSize);
        if (!parseCursorStatus.isOK()) {
            return appendCommandStatus(result, parseCursorStatus);
        }

        AutoGetCollectionForRead autoColl(txn, ns);
        if (!autoColl.getDb()) {
            return appendCommandStatus(result,
                                       Status(ErrorCodes::NamespaceNotFound, "no database"));
        }

        const Collection* collection = autoColl.getCollection();
        if (!collection) {
            return appendCommandStatus(result,
                                       Status(ErrorCodes::NamespaceNotFound, "no collection"));
        }

        const CollectionCatalogEntry* cce = collection->getCatalogEntry();
        invariant(cce);

        vector<string> indexNames;
        MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
            indexNames.clear();
            cce->getAllIndexes(txn, &indexNames);
        }
        MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns());

        std::unique_ptr<WorkingSet> ws(new WorkingSet());
        std::unique_ptr<QueuedDataStage> root(new QueuedDataStage(ws.get()));

        for (size_t i = 0; i < indexNames.size(); i++) {
            BSONObj indexSpec;
            MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
                indexSpec = cce->getIndexSpec(txn, indexNames[i]);
            }
            MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "listIndexes", ns.ns());

            WorkingSetID id = ws->allocate();
            WorkingSetMember* member = ws->get(id);
            member->keyData.clear();
            member->loc = RecordId();
            member->obj = Snapshotted<BSONObj>(SnapshotId(), indexSpec.getOwned());
            member->transitionToOwnedObj();
            root->pushBack(id);
        }

        std::string cursorNamespace = str::stream() << dbname << ".$cmd." << name << "."
                                                    << ns.coll();
        dassert(NamespaceString(cursorNamespace).isValid());
        dassert(NamespaceString(cursorNamespace).isListIndexesCursorNS());
        dassert(ns == NamespaceString(cursorNamespace).getTargetNSForListIndexes());

        auto statusWithPlanExecutor = PlanExecutor::make(
            txn, std::move(ws), std::move(root), cursorNamespace, PlanExecutor::YIELD_MANUAL);
        if (!statusWithPlanExecutor.isOK()) {
            return appendCommandStatus(result, statusWithPlanExecutor.getStatus());
        }
        std::unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue());

        BSONArrayBuilder firstBatch;

        const int byteLimit = MaxBytesToReturnToClientAtOnce;
        for (long long objCount = 0; objCount < batchSize && firstBatch.len() < byteLimit;
             objCount++) {
            BSONObj next;
            PlanExecutor::ExecState state = exec->getNext(&next, NULL);
            if (state == PlanExecutor::IS_EOF) {
                break;
            }
            invariant(state == PlanExecutor::ADVANCED);
            firstBatch.append(next);
        }

        CursorId cursorId = 0LL;
        if (!exec->isEOF()) {
            exec->saveState();
            ClientCursor* cursor = new ClientCursor(
                CursorManager::getGlobalCursorManager(), exec.release(), cursorNamespace);
            cursorId = cursor->cursorid();
        }

        appendCursorResponseObject(cursorId, cursorNamespace, firstBatch.arr(), &result);

        return true;
    }
Ejemplo n.º 16
0
        void run() {
            Client::WriteContext ctx(&_txn, ns());
            Database* db = ctx.ctx().db();
            Collection* coll = db->getCollection(&_txn, ns());
            if (!coll) {
                coll = db->createCollection(&_txn, ns());
            }

            WorkingSet ws;
            // Sort by foo:1
            MergeSortStageParams msparams;
            msparams.pattern = BSON("foo" << 1);
            auto_ptr<MergeSortStage> ms(new MergeSortStage(msparams, &ws, coll));

            IndexScanParams params;
            params.bounds.isSimpleRange = true;
            params.bounds.startKey = objWithMinKey(1);
            params.bounds.endKey = objWithMaxKey(1);
            params.bounds.endKeyInclusive = true;
            params.direction = 1;

            // Index 'a'+i has foo equal to 'i'.

            int numIndices = 20;
            for (int i = 0; i < numIndices; ++i) {
                // 'a', 'b', ...
                string index(1, 'a' + i);
                insert(BSON(index << 1 << "foo" << i));

                BSONObj indexSpec = BSON(index << 1 << "foo" << 1);
                addIndex(indexSpec);
                params.descriptor = getIndex(indexSpec, coll);
                ms->addChild(new IndexScan(&_txn, params, &ws, NULL));
            }

            set<DiskLoc> locs;
            getLocs(&locs, coll);

            set<DiskLoc>::iterator it = locs.begin();
            ctx.commit();

            // Get 10 results.  Should be getting results in order of 'locs'.
            int count = 0;
            while (!ms->isEOF() && count < 10) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                PlanStage::StageState status = ms->work(&id);
                if (PlanStage::ADVANCED != status) { continue; }

                WorkingSetMember* member = ws.get(id);
                ASSERT_EQUALS(member->loc, *it);
                BSONElement elt;
                string index(1, 'a' + count);
                ASSERT(member->getFieldDotted(index, &elt));
                ASSERT_EQUALS(1, elt.numberInt());
                ASSERT(member->getFieldDotted("foo", &elt));
                ASSERT_EQUALS(count, elt.numberInt());
                ++count;
                ++it;
            }

            // Invalidate locs[11].  Should force a fetch.  We don't get it back.
            ms->prepareToYield();
            ms->invalidate(*it, INVALIDATION_DELETION);
            ms->recoverFromYield(&_txn);

            // Make sure locs[11] was fetched for us.
            {
            // TODO: If we have "return upon invalidation" ever triggerable, do the following test.
            /*
                WorkingSetID id = WorkingSet::INVALID_ID;
                PlanStage::StageState status;
                do {
                    status = ms->work(&id);
                } while (PlanStage::ADVANCED != status);

                WorkingSetMember* member = ws.get(id);
                ASSERT(!member->hasLoc());
                ASSERT(member->hasObj());
                string index(1, 'a' + count);
                BSONElement elt;
                ASSERT_TRUE(member->getFieldDotted(index, &elt));
                ASSERT_EQUALS(1, elt.numberInt());
                ASSERT(member->getFieldDotted("foo", &elt));
                ASSERT_EQUALS(count, elt.numberInt());
            */

                ++it;
                ++count;
            }

            // And get the rest.
            while (!ms->isEOF()) {
                WorkingSetID id = WorkingSet::INVALID_ID;
                PlanStage::StageState status = ms->work(&id);
                if (PlanStage::ADVANCED != status) { continue; }

                WorkingSetMember* member = ws.get(id);
                ASSERT_EQUALS(member->loc, *it);
                BSONElement elt;
                string index(1, 'a' + count);
                ASSERT_TRUE(member->getFieldDotted(index, &elt));
                ASSERT_EQUALS(1, elt.numberInt());
                ASSERT(member->getFieldDotted("foo", &elt));
                ASSERT_EQUALS(count, elt.numberInt());
                ++count;
                ++it;
            }
        }
Ejemplo n.º 17
0
    void run() {
        // Populate the collection.
        for (int i = 0; i < 50; ++i) {
            insert(BSON("_id" << i << "foo" << i));
        }
        ASSERT_EQUALS(50U, count(BSONObj()));

        // Various variables we'll need.
        dbtests::WriteContextForTests ctx(&_opCtx, nss.ns());
        OpDebug* opDebug = &CurOp::get(_opCtx)->debug();
        Collection* coll = ctx.getCollection();
        ASSERT(coll);
        UpdateRequest request(nss);
        const CollatorInterface* collator = nullptr;
        UpdateDriver driver(new ExpressionContext(&_opCtx, collator));
        const int targetDocIndex = 10;
        const BSONObj query = BSON("foo" << BSON("$gte" << targetDocIndex));
        const auto ws = make_unique<WorkingSet>();
        const unique_ptr<CanonicalQuery> cq(canonicalize(query));

        // Get the RecordIds that would be returned by an in-order scan.
        vector<RecordId> recordIds;
        getRecordIds(coll, CollectionScanParams::FORWARD, &recordIds);

        // Populate the request.
        request.setQuery(query);
        request.setUpdates(fromjson("{$set: {x: 0}}"));
        request.setSort(BSONObj());
        request.setMulti(false);
        request.setReturnDocs(UpdateRequest::RETURN_NEW);

        const std::map<StringData, std::unique_ptr<ExpressionWithPlaceholder>> arrayFilters;

        ASSERT_DOES_NOT_THROW(driver.parse(request.getUpdates(), arrayFilters, request.isMulti()));

        // Configure a QueuedDataStage to pass the first object in the collection back in a
        // RID_AND_OBJ state.
        auto qds = make_unique<QueuedDataStage>(&_opCtx, ws.get());
        WorkingSetID id = ws->allocate();
        WorkingSetMember* member = ws->get(id);
        member->recordId = recordIds[targetDocIndex];
        const BSONObj oldDoc = BSON("_id" << targetDocIndex << "foo" << targetDocIndex);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), oldDoc);
        ws->transitionToRecordIdAndObj(id);
        qds->pushBack(id);

        // Configure the update.
        UpdateStageParams updateParams(&request, &driver, opDebug);
        updateParams.canonicalQuery = cq.get();

        auto updateStage =
            make_unique<UpdateStage>(&_opCtx, updateParams, ws.get(), coll, qds.release());

        // Should return advanced.
        id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = updateStage->work(&id);
        ASSERT_EQUALS(PlanStage::ADVANCED, state);

        // Make sure the returned value is what we expect it to be.

        // Should give us back a valid id.
        ASSERT_TRUE(WorkingSet::INVALID_ID != id);
        WorkingSetMember* resultMember = ws->get(id);
        // With an owned copy of the object, with no RecordId.
        ASSERT_TRUE(resultMember->hasOwnedObj());
        ASSERT_FALSE(resultMember->hasRecordId());
        ASSERT_EQUALS(resultMember->getState(), WorkingSetMember::OWNED_OBJ);
        ASSERT_TRUE(resultMember->obj.value().isOwned());

        // Should be the new value.
        BSONObj newDoc = BSON("_id" << targetDocIndex << "foo" << targetDocIndex << "x" << 0);
        ASSERT_BSONOBJ_EQ(resultMember->obj.value(), newDoc);

        // Should have done the update.
        vector<BSONObj> objs;
        getCollContents(coll, &objs);
        ASSERT_BSONOBJ_EQ(objs[targetDocIndex], newDoc);

        // That should be it.
        id = WorkingSet::INVALID_ID;
        ASSERT_EQUALS(PlanStage::IS_EOF, updateStage->work(&id));
    }
Ejemplo n.º 18
0
    PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) {
        PlanStage::StageState state = _child->work(out);

        // All done reading from _child.
        if (PlanStage::IS_EOF == state) {
            _child.reset();
            _keyGeoFilter.reset();

            // Adjust the annulus size depending on how many results we got.
            if (_results.empty()) {
                _radiusIncrement *= 2;
            } else if (_results.size() < 300) {
                _radiusIncrement *= 2;
            } else if (_results.size() > 600) {
                _radiusIncrement /= 2;
            }

            // Make a new ixscan next time.
            return PlanStage::NEED_TIME;
        }

        // Nothing to do unless we advance.
        if (PlanStage::ADVANCED != state) { return state; }

        WorkingSetMember* member = _ws->get(*out);
        // Must have an object in order to get geometry out of it.
        verify(member->hasObj());

        // The scans we use don't dedup so we must dedup them ourselves.  We only put locs into here
        // if we know for sure whether or not we'll return them in this annulus.
        if (member->hasLoc()) {
            if (_seenInScan.end() != _seenInScan.find(member->loc)) {
                return PlanStage::NEED_TIME;
            }
        }

        // Get all the fields with that name from the document.
        BSONElementSet geom;
        member->obj.getFieldsDotted(_params.nearQuery.field, geom, false);
        if (geom.empty()) {
            return PlanStage::NEED_TIME;
        }

        // Some value that any distance we can calculate will be less than.
        double minDistance = numeric_limits<double>::max();
        BSONObj minDistanceObj;
        for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) {
            if (!git->isABSONObj()) {
                mongoutils::str::stream ss;
                ss << "s2near stage read invalid geometry element " << *git << " from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember( _ws, status);
                return PlanStage::FAILURE;
            }
            BSONObj obj = git->Obj();

            double distToObj;
            if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) {
                if (distToObj < minDistance) {
                    minDistance = distToObj;
                    minDistanceObj = obj;
                }
            }
            else {
                warning() << "unknown geometry: " << obj.toString();
            }
        }

        // If we're here we'll either include the doc in this annulus or reject it.  It's safe to
        // ignore it if it pops up again in this annulus.
        if (member->hasLoc()) {
            _seenInScan.insert(member->loc);
        }

        // If the distance to the doc satisfies our distance criteria, add it to our buffered
        // results.
        if (minDistance >= _innerRadius &&
            (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) {
            _results.push(Result(*out, minDistance));
            if (_params.addDistMeta) {
                // FLAT implies the output distances are in radians.  Convert to meters.
                if (FLAT == _params.nearQuery.centroid.crs) {
                    member->addComputed(new GeoDistanceComputedData(minDistance
                                                                    / kRadiusOfEarthInMeters));
                }
                else {
                    member->addComputed(new GeoDistanceComputedData(minDistance));
                }
            }
            if (_params.addPointMeta) {
                member->addComputed(new GeoNearPointComputedData(minDistanceObj));
            }
            if (member->hasLoc()) {
                _invalidationMap[member->loc] = *out;
            }
        }

        return PlanStage::NEED_TIME;
    }
Ejemplo n.º 19
0
PlanStage::StageState CollectionScan::doWork(WorkingSetID* out) {
    if (_isDead) {
        Status status(
            ErrorCodes::CappedPositionLost,
            str::stream()
                << "CollectionScan died due to position in capped collection being deleted. "
                << "Last seen record id: "
                << _lastSeenId);
        *out = WorkingSetCommon::allocateStatusMember(_workingSet, status);
        return PlanStage::DEAD;
    }

    if ((0 != _params.maxScan) && (_specificStats.docsTested >= _params.maxScan)) {
        _commonStats.isEOF = true;
    }

    if (_commonStats.isEOF) {
        return PlanStage::IS_EOF;
    }

    boost::optional<Record> record;
    const bool needToMakeCursor = !_cursor;
    try {
        if (needToMakeCursor) {
            const bool forward = _params.direction == CollectionScanParams::FORWARD;
            _cursor = _params.collection->getCursor(getOpCtx(), forward);

            if (!_lastSeenId.isNull()) {
                invariant(_params.tailable);
                // Seek to where we were last time. If it no longer exists, mark us as dead
                // since we want to signal an error rather than silently dropping data from the
                // stream. This is related to the _lastSeenId handling in invalidate. Note that
                // we want to return the record *after* this one since we have already returned
                // this one. This is only possible in the tailing case because that is the only
                // time we'd need to create a cursor after already getting a record out of it.
                if (!_cursor->seekExact(_lastSeenId)) {
                    _isDead = true;
                    Status status(ErrorCodes::CappedPositionLost,
                                  str::stream() << "CollectionScan died due to failure to restore "
                                                << "tailable cursor position. "
                                                << "Last seen record id: "
                                                << _lastSeenId);
                    *out = WorkingSetCommon::allocateStatusMember(_workingSet, status);
                    return PlanStage::DEAD;
                }
            }

            return PlanStage::NEED_TIME;
        }

        if (_lastSeenId.isNull() && !_params.start.isNull()) {
            record = _cursor->seekExact(_params.start);
        } else {
            // See if the record we're about to access is in memory. If not, pass a fetch
            // request up.
            if (auto fetcher = _cursor->fetcherForNext()) {
                // Pass the RecordFetcher up.
                WorkingSetMember* member = _workingSet->get(_wsidForFetch);
                member->setFetcher(fetcher.release());
                *out = _wsidForFetch;
                return PlanStage::NEED_YIELD;
            }

            record = _cursor->next();
        }
    } catch (const WriteConflictException& wce) {
        // Leave us in a state to try again next time.
        if (needToMakeCursor)
            _cursor.reset();
        *out = WorkingSet::INVALID_ID;
        return PlanStage::NEED_YIELD;
    }

    if (!record) {
        // We just hit EOF. If we are tailable and have already returned data, leave us in a
        // state to pick up where we left off on the next call to work(). Otherwise EOF is
        // permanent.
        if (_params.tailable && !_lastSeenId.isNull()) {
            _cursor.reset();
        } else {
            _commonStats.isEOF = true;
        }

        return PlanStage::IS_EOF;
    }

    _lastSeenId = record->id;

    WorkingSetID id = _workingSet->allocate();
    WorkingSetMember* member = _workingSet->get(id);
    member->recordId = record->id;
    member->obj = {getOpCtx()->recoveryUnit()->getSnapshotId(), record->data.releaseToBson()};
    _workingSet->transitionToRecordIdAndObj(id);

    return returnIfMatches(member, id, out);
}
Ejemplo n.º 20
0
/**
 * addToBuffer() and sortBuffer() work differently based on the
 * configured limit. addToBuffer() is also responsible for
 * performing some accounting on the overall memory usage to
 * make sure we're not using too much memory.
 *
 * limit == 0:
 *     addToBuffer() - Adds item to vector.
 *     sortBuffer() - Sorts vector.
 * limit == 1:
 *     addToBuffer() - Replaces first item in vector with max of
 *                     current and new item.
 *                     Updates memory usage if item was replaced.
 *     sortBuffer() - Does nothing.
 * limit > 1:
 *     addToBuffer() - Does not update vector. Adds item to set.
 *                     If size of set exceeds limit, remove item from set
 *                     with lowest key. Updates memory usage accordingly.
 *     sortBuffer() - Copies items from set to vectors.
 */
void SortStage::addToBuffer(const SortableDataItem& item) {
    // Holds ID of working set member to be freed at end of this function.
    WorkingSetID wsidToFree = WorkingSet::INVALID_ID;

    WorkingSetMember* member = _ws->get(item.wsid);
    if (_limit == 0) {
        // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
        member->makeObjOwnedIfNeeded();
        _data.push_back(item);
        _memUsage += member->getMemUsage();
    } else if (_limit == 1) {
        if (_data.empty()) {
            member->makeObjOwnedIfNeeded();
            _data.push_back(item);
            _memUsage = member->getMemUsage();
            return;
        }
        wsidToFree = item.wsid;
        const WorkingSetComparator& cmp = *_sortKeyComparator;
        // Compare new item with existing item in vector.
        if (cmp(item, _data[0])) {
            wsidToFree = _data[0].wsid;
            member->makeObjOwnedIfNeeded();
            _data[0] = item;
            _memUsage = member->getMemUsage();
        }
    } else {
        // Update data item set instead of vector
        // Limit not reached - insert and return
        vector<SortableDataItem>::size_type limit(_limit);
        if (_dataSet->size() < limit) {
            member->makeObjOwnedIfNeeded();
            _dataSet->insert(item);
            _memUsage += member->getMemUsage();
            return;
        }
        // Limit will be exceeded - compare with item with lowest key
        // If new item does not have a lower key value than last item,
        // do nothing.
        wsidToFree = item.wsid;
        SortableDataItemSet::const_iterator lastItemIt = --(_dataSet->end());
        const SortableDataItem& lastItem = *lastItemIt;
        const WorkingSetComparator& cmp = *_sortKeyComparator;
        if (cmp(item, lastItem)) {
            _memUsage -= _ws->get(lastItem.wsid)->getMemUsage();
            _memUsage += member->getMemUsage();
            wsidToFree = lastItem.wsid;
            // According to std::set iterator validity rules,
            // it does not matter which of erase()/insert() happens first.
            // Here, we choose to erase first to release potential resources
            // used by the last item and to keep the scope of the iterator to a minimum.
            _dataSet->erase(lastItemIt);
            member->makeObjOwnedIfNeeded();
            _dataSet->insert(item);
        }
    }

    // If the working set ID is valid, remove from
    // RecordId invalidation map and free from working set.
    if (wsidToFree != WorkingSet::INVALID_ID) {
        WorkingSetMember* member = _ws->get(wsidToFree);
        if (member->hasLoc()) {
            _wsidByDiskLoc.erase(member->loc);
        }
        _ws->free(wsidToFree);
    }
}
PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
    if (isEOF()) {
        return PlanStage::IS_EOF;
    }
    invariant(_collection);  // If isEOF() returns false, we must have a collection.

    // It is possible that after a delete was executed, a WriteConflictException occurred
    // and prevented us from returning ADVANCED with the old version of the document.
    if (_idReturning != WorkingSet::INVALID_ID) {
        // We should only get here if we were trying to return something before.
        invariant(_params.returnDeleted);

        WorkingSetMember* member = _ws->get(_idReturning);
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        *out = _idReturning;
        _idReturning = WorkingSet::INVALID_ID;
        return PlanStage::ADVANCED;
    }

    // Either retry the last WSM we worked on or get a new one from our child.
    WorkingSetID id;
    if (_idRetrying != WorkingSet::INVALID_ID) {
        id = _idRetrying;
        _idRetrying = WorkingSet::INVALID_ID;
    } else {
        auto status = child()->work(&id);

        switch (status) {
        case PlanStage::ADVANCED:
            break;

        case PlanStage::FAILURE:
        case PlanStage::DEAD:
            *out = id;

            // If a stage fails, it may create a status WSM to indicate why it failed, in which
            // case 'id' is valid.  If ID is invalid, we create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                const std::string errmsg = "delete stage failed to read in results from child";
                *out = WorkingSetCommon::allocateStatusMember(
                           _ws, Status(ErrorCodes::InternalError, errmsg));
            }
            return status;

        case PlanStage::NEED_TIME:
            return status;

        case PlanStage::NEED_YIELD:
            *out = id;
            return status;

        case PlanStage::IS_EOF:
            return status;

        default:
            MONGO_UNREACHABLE;
        }
    }

    // We advanced, or are retrying, and id is set to the WSM to work on.
    WorkingSetMember* member = _ws->get(id);

    // We want to free this member when we return, unless we need to retry it.
    ScopeGuard memberFreer = MakeGuard(&WorkingSet::free, _ws, id);

    if (!member->hasRecordId()) {
        // We expect to be here because of an invalidation causing a force-fetch.
        ++_specificStats.nInvalidateSkips;
        return PlanStage::NEED_TIME;
    }
    RecordId recordId = member->recordId;
    // Deletes can't have projections. This means that covering analysis will always add
    // a fetch. We should always get fetched data, and never just key data.
    invariant(member->hasObj());

    try {
        // If the snapshot changed, then we have to make sure we have the latest copy of the
        // doc and that it still matches.
        std::unique_ptr<SeekableRecordCursor> cursor;
        if (getOpCtx()->recoveryUnit()->getSnapshotId() != member->obj.snapshotId()) {
            cursor = _collection->getCursor(getOpCtx());
            if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, cursor)) {
                // Doc is already deleted. Nothing more to do.
                return PlanStage::NEED_TIME;
            }

            // Make sure the re-fetched doc still matches the predicate.
            if (_params.canonicalQuery &&
                    !_params.canonicalQuery->root()->matchesBSON(member->obj.value(), NULL)) {
                // Doesn't match.
                return PlanStage::NEED_TIME;
            }
        }

        // Ensure that the BSONObj underlying the WorkingSetMember is owned because saveState()
        // is allowed to free the memory.
        if (_params.returnDeleted) {
            // Save a copy of the document that is about to get deleted, but keep it in the
            // RID_AND_OBJ state in case we need to retry deleting it.
            BSONObj deletedDoc = member->obj.value();
            member->obj.setValue(deletedDoc.getOwned());
        }

        // TODO: Do we want to buffer docs and delete them in a group rather than
        // saving/restoring state repeatedly?

        try {
            WorkingSetCommon::prepareForSnapshotChange(_ws);
            child()->saveState();
        } catch (const WriteConflictException& wce) {
            std::terminate();
        }

        // Do the write, unless this is an explain.
        if (!_params.isExplain) {
            WriteUnitOfWork wunit(getOpCtx());
            _collection->deleteDocument(getOpCtx(), recordId, _params.fromMigrate);
            wunit.commit();
        }

        ++_specificStats.docsDeleted;
    } catch (const WriteConflictException& wce) {
        // When we're doing a findAndModify with a sort, the sort will have a limit of 1, so will
        // not produce any more results even if there is another matching document. Re-throw the WCE
        // here so that these operations get another chance to find a matching document. The
        // findAndModify command should automatically retry if it gets a WCE.
        // TODO: this is not necessary if there was no sort specified.
        if (_params.returnDeleted) {
            throw;
        }
        _idRetrying = id;
        memberFreer.Dismiss();  // Keep this member around so we can retry deleting it.
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // After deleting the document, the RecordId associated with this member is invalid.
        // Remove the 'recordId' from the WorkingSetMember before returning it.
        member->recordId = RecordId();
        member->transitionToOwnedObj();
    }

    //  As restoreState may restore (recreate) cursors, cursors are tied to the
    //  transaction in which they are created, and a WriteUnitOfWork is a
    //  transaction, make sure to restore the state outside of the WritUnitOfWork.
    try {
        child()->restoreState();
    } catch (const WriteConflictException& wce) {
        // Note we don't need to retry anything in this case since the delete already
        // was committed. However, we still need to return the deleted document
        // (if it was requested).
        if (_params.returnDeleted) {
            // member->obj should refer to the deleted document.
            invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

            _idReturning = id;
            // Keep this member around so that we can return it on the next work() call.
            memberFreer.Dismiss();
        }
        *out = WorkingSet::INVALID_ID;
        return NEED_YIELD;
    }

    if (_params.returnDeleted) {
        // member->obj should refer to the deleted document.
        invariant(member->getState() == WorkingSetMember::OWNED_OBJ);

        memberFreer.Dismiss();  // Keep this member around so we can return it.
        *out = id;
        return PlanStage::ADVANCED;
    }

    return PlanStage::NEED_TIME;
}
Ejemplo n.º 22
0
    PlanStage::StageState TwoDNear::work(WorkingSetID* out) {
        ++_commonStats.works;

        // Adds the amount of time taken by work() to executionTimeMillis.
        ScopedTimer timer(&_commonStats.executionTimeMillis);

        if (!_initted) {
            _initted = true;

            if ( !_params.collection )
                return PlanStage::IS_EOF;

            IndexCatalog* indexCatalog = _params.collection->getIndexCatalog();

            IndexDescriptor* desc = indexCatalog->findIndexByKeyPattern(_params.indexKeyPattern);
            if ( desc == NULL )
                return PlanStage::IS_EOF;
            TwoDAccessMethod* am = static_cast<TwoDAccessMethod*>( indexCatalog->getIndex( desc ) );

            auto_ptr<twod_exec::GeoSearch> search;
            search.reset(new twod_exec::GeoSearch(_params.collection,
                                           am,
                                           _params.nearQuery.centroid.oldPoint,
                                           _params.numWanted, 
                                           _params.filter,
                                           _params.nearQuery.maxDistance,
                                           _params.nearQuery.isNearSphere ? twod_exec::GEO_SPHERE
                                                                          : twod_exec::GEO_PLANE));

            // This is where all the work is done.  :(
            search->exec();
            _specificStats.objectsLoaded = search->_objectsLoaded;
            _specificStats.nscanned = search->_lookedAt;

            for (twod_exec::GeoHopper::Holder::iterator it = search->_points.begin();
                 it != search->_points.end(); it++) {

                WorkingSetID id = _workingSet->allocate();
                WorkingSetMember* member = _workingSet->get(id);
                member->loc = it->_loc;
                member->obj = _params.collection->docFor(member->loc);
                member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
                if (_params.addDistMeta) {
                    member->addComputed(new GeoDistanceComputedData(it->_distance));
                }
                if (_params.addPointMeta) {
                    member->addComputed(new GeoNearPointComputedData(it->_pt));
                }
                _results.push(Result(id, it->_distance));
                _invalidationMap.insert(pair<DiskLoc, WorkingSetID>(it->_loc, id));
            }
        }

        if (isEOF()) { return PlanStage::IS_EOF; }

        Result result = _results.top();
        _results.pop();
        *out = result.id;

        // Remove from invalidation map.
        WorkingSetMember* member = _workingSet->get(*out);

        // The WSM may have been mutated or deleted so it may not have a loc.
        if (member->hasLoc()) {
            typedef multimap<DiskLoc, WorkingSetID>::iterator MMIT;
            pair<MMIT, MMIT> range = _invalidationMap.equal_range(member->loc);
            for (MMIT it = range.first; it != range.second; ++it) {
                if (it->second == *out) {
                    _invalidationMap.erase(it);
                    break;
                }
            }
        }

        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }
Ejemplo n.º 23
0
Archivo: or.cpp Proyecto: Jiangew/mongo
PlanStage::StageState OrStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    if (0 == _specificStats.matchTested.size()) {
        _specificStats.matchTested = vector<uint64_t>(_children.size(), 0);
    }

    WorkingSetID id;
    StageState childStatus = _children[_currentChild]->work(&id);

    if (PlanStage::ADVANCED == childStatus) {
        WorkingSetMember* member = _ws->get(id);
        verify(member->hasLoc());

        // If we're deduping...
        if (_dedup) {
            ++_specificStats.dupsTested;

            // ...and we've seen the DiskLoc before
            if (_seen.end() != _seen.find(member->loc)) {
                // ...drop it.
                ++_specificStats.dupsDropped;
                _ws->free(id);
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else {
                // Otherwise, note that we've seen it.
                _seen.insert(member->loc);
            }
        }

        if (Filter::passes(member, _filter)) {
            if (NULL != _filter) {
                ++_specificStats.matchTested[_currentChild];
            }
            // Match!  return it.
            *out = id;
            ++_commonStats.advanced;
            return PlanStage::ADVANCED;
        }
        else {
            // Does not match, try again.
            _ws->free(id);
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
    }
    else if (PlanStage::IS_EOF == childStatus) {
        // Done with _currentChild, move to the next one.
        ++_currentChild;

        // Maybe we're out of children.
        if (isEOF()) {
            return PlanStage::IS_EOF;
        }
        else {
            ++_commonStats.needTime;
            return PlanStage::NEED_TIME;
        }
    }
    else {
        if (PlanStage::NEED_FETCH == childStatus) {
            *out = id;
            ++_commonStats.needFetch;
        }
        else if (PlanStage::NEED_TIME == childStatus) {
            ++_commonStats.needTime;
        }

        // NEED_TIME, ERROR, NEED_YIELD, pass them up.
        return childStatus;
    }
}
Ejemplo n.º 24
0
// static
Status WorkingSetCommon::getMemberStatus(const WorkingSetMember& member) {
    invariant(member.hasObj());
    return getMemberObjectStatus(member.obj.value());
}
Ejemplo n.º 25
0
    void run() {
        Lock::DBLock lk(&_opCtx, nsToDatabaseSubstring(ns()), MODE_X);
        OldClientContext ctx(&_opCtx, ns());
        Database* db = ctx.db();
        Collection* coll = db->getCollection(&_opCtx, ns());
        if (!coll) {
            WriteUnitOfWork wuow(&_opCtx);
            coll = db->createCollection(&_opCtx, ns());
            wuow.commit();
        }

        WorkingSet ws;

        // Add an object to the DB.
        insert(BSON("foo" << 5));
        set<RecordId> recordIds;
        getRecordIds(&recordIds, coll);
        ASSERT_EQUALS(size_t(1), recordIds.size());

        // Create a mock stage that returns the WSM.
        auto mockStage = make_unique<QueuedDataStage>(&_opCtx, &ws);

        // Mock data.
        {
            WorkingSetID id = ws.allocate();
            WorkingSetMember* mockMember = ws.get(id);
            mockMember->recordId = *recordIds.begin();
            ws.transitionToRecordIdAndIdx(id);

            // State is RecordId and index, shouldn't be able to get the foo data inside.
            BSONElement elt;
            ASSERT_FALSE(mockMember->getFieldDotted("foo", &elt));
            mockStage->pushBack(id);
        }

        // Make the filter.
        BSONObj filterObj = BSON("foo" << 6);
        const CollatorInterface* collator = nullptr;
        const boost::intrusive_ptr<ExpressionContext> expCtx(
            new ExpressionContext(&_opCtx, collator));
        StatusWithMatchExpression statusWithMatcher =
            MatchExpressionParser::parse(filterObj, expCtx);
        verify(statusWithMatcher.isOK());
        unique_ptr<MatchExpression> filterExpr = std::move(statusWithMatcher.getValue());

        // Matcher requires that foo==6 but we only have data with foo==5.
        unique_ptr<FetchStage> fetchStage(
            new FetchStage(&_opCtx, &ws, mockStage.release(), filterExpr.get(), coll));

        // First call should return a fetch request as it's not in memory.
        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState state;

        // Normally we'd return the object but we have a filter that prevents it.
        state = fetchStage->work(&id);
        ASSERT_EQUALS(PlanStage::NEED_TIME, state);

        // No more data to fetch, so, EOF.
        state = fetchStage->work(&id);
        ASSERT_EQUALS(PlanStage::IS_EOF, state);
    }
Ejemplo n.º 26
0
    bool MultiPlanStage::workAllPlans(size_t numResults, PlanYieldPolicy* yieldPolicy) {
        bool doneWorking = false;

        for (size_t ix = 0; ix < _candidates.size(); ++ix) {
            CandidatePlan& candidate = _candidates[ix];
            if (candidate.failed) { continue; }

            // Might need to yield between calls to work due to the timer elapsing.
            if (!(tryYield(yieldPolicy)).isOK()) {
                return false;
            }

            WorkingSetID id = WorkingSet::INVALID_ID;
            PlanStage::StageState state = candidate.root->work(&id);

            if (PlanStage::ADVANCED == state) {
                // Save result for later.
                candidate.results.push_back(id);

                // Once a plan returns enough results, stop working.
                if (candidate.results.size() >= numResults) {
                    doneWorking = true;
                }
            }
            else if (PlanStage::IS_EOF == state) {
                // First plan to hit EOF wins automatically.  Stop evaluating other plans.
                // Assumes that the ranking will pick this plan.
                doneWorking = true;
            }
            else if (PlanStage::NEED_YIELD == state) {
                if (id == WorkingSet::INVALID_ID) {
                    if (!yieldPolicy->allowedToYield())
                        throw WriteConflictException();
                }
                else {
                    WorkingSetMember* member = candidate.ws->get(id);
                    invariant(member->hasFetcher());
                    // Transfer ownership of the fetcher and yield.
                    _fetcher.reset(member->releaseFetcher());
                }

                if (yieldPolicy->allowedToYield()) {
                    yieldPolicy->forceYield();
                }

                if (!(tryYield(yieldPolicy)).isOK()) {
                    return false;
                }
            }
            else if (PlanStage::NEED_TIME != state) {
                // FAILURE or DEAD.  Do we want to just tank that plan and try the rest?  We
                // probably want to fail globally as this shouldn't happen anyway.

                candidate.failed = true;
                ++_failureCount;

                // Propagate most recent seen failure to parent.
                if (PlanStage::FAILURE == state) {
                    _statusMemberId = id;
                }

                if (_failureCount == _candidates.size()) {
                    _failure = true;
                    return false;
                }
            }
        }

        return !doneWorking;
    }
Ejemplo n.º 27
0
Status CachedPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
    // Adds the amount of time taken by pickBestPlan() to executionTimeMillis. There's lots of
    // execution work that happens here, so this is needed for the time accounting to
    // make sense.
    ScopedTimer timer(&_commonStats.executionTimeMillis);

    // If we work this many times during the trial period, then we will replan the
    // query from scratch.
    size_t maxWorksBeforeReplan =
        static_cast<size_t>(internalQueryCacheEvictionRatio * _decisionWorks);

    // The trial period ends without replanning if the cached plan produces this many results.
    size_t numResults = MultiPlanStage::getTrialPeriodNumToReturn(*_canonicalQuery);

    for (size_t i = 0; i < maxWorksBeforeReplan; ++i) {
        // Might need to yield between calls to work due to the timer elapsing.
        Status yieldStatus = tryYield(yieldPolicy);
        if (!yieldStatus.isOK()) {
            return yieldStatus;
        }

        WorkingSetID id = WorkingSet::INVALID_ID;
        PlanStage::StageState state = child()->work(&id);

        if (PlanStage::ADVANCED == state) {
            // Save result for later.
            WorkingSetMember* member = _ws->get(id);
            // Ensure that the BSONObj underlying the WorkingSetMember is owned in case we yield.
            member->makeObjOwnedIfNeeded();
            _results.push_back(id);

            if (_results.size() >= numResults) {
                // Once a plan returns enough results, stop working. Update cache with stats
                // from this run and return.
                updatePlanCache();
                return Status::OK();
            }
        } else if (PlanStage::IS_EOF == state) {
            // Cached plan hit EOF quickly enough. No need to replan. Update cache with stats
            // from this run and return.
            updatePlanCache();
            return Status::OK();
        } else if (PlanStage::NEED_YIELD == state) {
            if (id == WorkingSet::INVALID_ID) {
                if (!yieldPolicy->allowedToYield()) {
                    throw WriteConflictException();
                }
            } else {
                WorkingSetMember* member = _ws->get(id);
                invariant(member->hasFetcher());
                // Transfer ownership of the fetcher and yield.
                _fetcher.reset(member->releaseFetcher());
            }

            if (yieldPolicy->allowedToYield()) {
                yieldPolicy->forceYield();
            }

            Status yieldStatus = tryYield(yieldPolicy);
            if (!yieldStatus.isOK()) {
                return yieldStatus;
            }
        } else if (PlanStage::FAILURE == state) {
            // On failure, fall back to replanning the whole query. We neither evict the
            // existing cache entry nor cache the result of replanning.
            BSONObj statusObj;
            WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);

            LOG(1) << "Execution of cached plan failed, falling back to replan."
                   << " query: " << _canonicalQuery->toStringShort()
                   << " planSummary: " << Explain::getPlanSummary(child().get())
                   << " status: " << statusObj;

            const bool shouldCache = false;
            return replan(yieldPolicy, shouldCache);
        } else if (PlanStage::DEAD == state) {
            BSONObj statusObj;
            WorkingSetCommon::getStatusMemberObject(*_ws, id, &statusObj);

            LOG(1) << "Execution of cached plan failed: PlanStage died"
                   << ", query: " << _canonicalQuery->toStringShort()
                   << " planSummary: " << Explain::getPlanSummary(child().get())
                   << " status: " << statusObj;

            return WorkingSetCommon::getMemberObjectStatus(statusObj);
        } else {
            invariant(PlanStage::NEED_TIME == state);
        }
    }

    // If we're here, the trial period took more than 'maxWorksBeforeReplan' work cycles. This
    // plan is taking too long, so we replan from scratch.
    LOG(1) << "Execution of cached plan required " << maxWorksBeforeReplan
           << " works, but was originally cached with only " << _decisionWorks
           << " works. Evicting cache entry and replanning query: "
           << _canonicalQuery->toStringShort()
           << " plan summary before replan: " << Explain::getPlanSummary(child().get());

    const bool shouldCache = true;
    return replan(yieldPolicy, shouldCache);
}
Ejemplo n.º 28
0
    PlanStage::StageState IndexScan::work(WorkingSetID* out) {
        ++_commonStats.works;

        if (NULL == _indexCursor.get()) {
            // First call to work().  Perform possibly heavy init.
            initIndexScan();
            checkEnd();
        }
        else if (_yieldMovedCursor) {
            _yieldMovedCursor = false;
            // Note that we're not calling next() here.  We got the next thing when we recovered
            // from yielding.
        }

        if (isEOF()) { return PlanStage::IS_EOF; }

        // Grab the next (key, value) from the index.
        BSONObj keyObj = _indexCursor->getKey();
        DiskLoc loc = _indexCursor->getValue();

        // Move to the next result.
        // The underlying IndexCursor points at the *next* thing we want to return.  We do this so
        // that if we're scanning an index looking for docs to delete we don't continually clobber
        // the thing we're pointing at.
        _indexCursor->next();
        checkEnd();

        if (_shouldDedup) {
            ++_specificStats.dupsTested;
            if (_returned.end() != _returned.find(loc)) {
                ++_specificStats.dupsDropped;
                ++_commonStats.needTime;
                return PlanStage::NEED_TIME;
            }
            else {
                _returned.insert(loc);
            }
        }

        if (Filter::passes(keyObj, _keyPattern, _filter)) {
            if (NULL != _filter) {
                ++_specificStats.matchTested;
            }

            // We must make a copy of the on-disk data since it can mutate during the execution of
            // this query.
            BSONObj ownedKeyObj = keyObj.getOwned();

            // Fill out the WSM.
            WorkingSetID id = _workingSet->allocate();
            WorkingSetMember* member = _workingSet->get(id);
            member->loc = loc;
            member->keyData.push_back(IndexKeyDatum(_keyPattern, ownedKeyObj));
            member->state = WorkingSetMember::LOC_AND_IDX;

            if (_params.addKeyMetadata) {
                BSONObjBuilder bob;
                bob.appendKeys(_keyPattern, ownedKeyObj);
                member->addComputed(new IndexKeyComputedData(bob.obj()));
            }

            *out = id;
            ++_commonStats.advanced;
            return PlanStage::ADVANCED;
        }

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    }
Ejemplo n.º 29
0
PlanStage::StageState GroupStage::work(WorkingSetID* out) {
    ++_commonStats.works;

    ScopedTimer timer(&_commonStats.executionTimeMillis);

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // On the first call to work(), call initGroupScripting().
    if (_groupState == GroupState_Initializing) {
        Status status = initGroupScripting();
        if (!status.isOK()) {
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            return PlanStage::FAILURE;
        }
        _groupState = GroupState_ReadingFromChild;
        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    }

    // Otherwise, read from our child.
    invariant(_groupState == GroupState_ReadingFromChild);
    WorkingSetID id = WorkingSet::INVALID_ID;
    StageState state = child()->work(&id);

    if (PlanStage::NEED_TIME == state) {
        ++_commonStats.needTime;
        return state;
    } else if (PlanStage::NEED_YIELD == state) {
        ++_commonStats.needYield;
        *out = id;
        return state;
    } else if (PlanStage::FAILURE == state) {
        *out = id;
        // If a stage fails, it may create a status WSM to indicate why it failed, in which
        // case 'id' is valid.  If ID is invalid, we create our own error message.
        if (WorkingSet::INVALID_ID == id) {
            const std::string errmsg = "group stage failed to read in results from child";
            *out = WorkingSetCommon::allocateStatusMember(
                _ws, Status(ErrorCodes::InternalError, errmsg));
        }
        return state;
    } else if (PlanStage::DEAD == state) {
        return state;
    } else if (PlanStage::ADVANCED == state) {
        WorkingSetMember* member = _ws->get(id);
        // Group queries can't have projections. This means that covering analysis will always
        // add a fetch. We should always get fetched data, and never just key data.
        invariant(member->hasObj());

        Status status = processObject(member->obj.value());
        if (!status.isOK()) {
            *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            return PlanStage::FAILURE;
        }

        _ws->free(id);

        ++_commonStats.needTime;
        return PlanStage::NEED_TIME;
    } else {
        // We're done reading from our child.
        invariant(PlanStage::IS_EOF == state);

        auto results = finalizeResults();
        if (!results.isOK()) {
            *out = WorkingSetCommon::allocateStatusMember(_ws, results.getStatus());
            return PlanStage::FAILURE;
        }

        // Transition to state "done."  Future calls to work() will return IS_EOF.
        _groupState = GroupState_Done;

        *out = _ws->allocate();
        WorkingSetMember* member = _ws->get(*out);
        member->obj = Snapshotted<BSONObj>(SnapshotId(), results.getValue());
        member->transitionToOwnedObj();

        ++_commonStats.advanced;
        return PlanStage::ADVANCED;
    }
}
Ejemplo n.º 30
0
PlanStage::StageState SortStage::doWork(WorkingSetID* out) {
    const size_t maxBytes = static_cast<size_t>(internalQueryExecMaxBlockingSortBytes);
    if (_memUsage > maxBytes) {
        mongoutils::str::stream ss;
        ss << "Sort operation used more than the maximum " << maxBytes
           << " bytes of RAM. Add an index, or specify a smaller limit.";
        Status status(ErrorCodes::OperationFailed, ss);
        *out = WorkingSetCommon::allocateStatusMember(_ws, status);
        return PlanStage::FAILURE;
    }

    if (isEOF()) {
        return PlanStage::IS_EOF;
    }

    // Still reading in results to sort.
    if (!_sorted) {
        WorkingSetID id = WorkingSet::INVALID_ID;
        StageState code = child()->work(&id);

        if (PlanStage::ADVANCED == code) {
            // Add it into the map for quick invalidation if it has a valid RecordId.
            // A RecordId may be invalidated at any time (during a yield).  We need to get into
            // the WorkingSet as quickly as possible to handle it.
            WorkingSetMember* member = _ws->get(id);

            // Planner must put a fetch before we get here.
            verify(member->hasObj());

            // We might be sorting something that was invalidated at some point.
            if (member->hasLoc()) {
                _wsidByDiskLoc[member->loc] = id;
            }

            SortableDataItem item;
            item.wsid = id;

            // We extract the sort key from the WSM's computed data. This must have been generated
            // by a SortKeyGeneratorStage descendent in the execution tree.
            auto sortKeyComputedData =
                static_cast<const SortKeyComputedData*>(member->getComputed(WSM_SORT_KEY));
            item.sortKey = sortKeyComputedData->getSortKey();

            if (member->hasLoc()) {
                // The RecordId breaks ties when sorting two WSMs with the same sort key.
                item.loc = member->loc;
            }

            addToBuffer(item);

            return PlanStage::NEED_TIME;
        } else if (PlanStage::IS_EOF == code) {
            // TODO: We don't need the lock for this.  We could ask for a yield and do this work
            // unlocked.  Also, this is performing a lot of work for one call to work(...)
            sortBuffer();
            _resultIterator = _data.begin();
            _sorted = true;
            return PlanStage::NEED_TIME;
        } else if (PlanStage::FAILURE == code || PlanStage::DEAD == code) {
            *out = id;
            // If a stage fails, it may create a status WSM to indicate why it
            // failed, in which case 'id' is valid.  If ID is invalid, we
            // create our own error message.
            if (WorkingSet::INVALID_ID == id) {
                mongoutils::str::stream ss;
                ss << "sort stage failed to read in results to sort from child";
                Status status(ErrorCodes::InternalError, ss);
                *out = WorkingSetCommon::allocateStatusMember(_ws, status);
            }
            return code;
        } else if (PlanStage::NEED_YIELD == code) {
            *out = id;
        }

        return code;
    }

    // Returning results.
    verify(_resultIterator != _data.end());
    verify(_sorted);
    *out = _resultIterator->wsid;
    _resultIterator++;

    // If we're returning something, take it out of our DL -> WSID map so that future
    // calls to invalidate don't cause us to take action for a DL we're done with.
    WorkingSetMember* member = _ws->get(*out);
    if (member->hasLoc()) {
        _wsidByDiskLoc.erase(member->loc);
    }

    return PlanStage::ADVANCED;
}